{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 仓储需求预测分析2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "数据下载地址（和之前的数据是一样的，包含了原始赛题与数据说明，此数据只包含了部分赛题数据）：https://pan.baidu.com/s/1R4dgFyDruxBrz-jWbMN8qw\n",
    "\n",
    "本案例简化了原有的赛题：这里我们预测15年10月份最后一个星期的销量，只考虑全国总仓销量，对应的目标字段为：\"qty_alipay_njhs\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 数据的导入"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from collections import Counter\n",
    "import pylab\n",
    "import matplotlib.pyplot as plt\n",
    "import lightgbm as lgb\n",
    "import xgboost as xgb\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from sklearn.ensemble import GradientBoostingRegressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "tp=pd.read_csv(r\"F:\\data\\tianchi\\wuliu\\part2\\item_feature2.csv\",sep=\",\",names=[\"date\",\"item_id\",\"cate_id\",\"cate_level_id\",\"brand_id\",\"supplier_id\",\"pv_ipv\",\n",
    "                                       \"pv_uv\",\"cart_ipv\",\"cart_uv\",\"collect_uv\",\"num_gmv\",\"amt_gmv\",\"qty_gmv\",\"unum_gmv\"\n",
    "                                       ,\"amt_alipay\",\"num_alipay\",\"qty_alipay\",\"unum_alipay\",\"ztc_pv_ipv\",\"tbk_pv_ipv\",\n",
    "                                       \"ss_pv_ipv\",\"jhs_pv_ipv\",\"ztc_pv_uv\",\"tbk_pv_uv\",\"ss_pv_uv\",\"jhs_pv_uv\",\"num_alipay_njhs\",\n",
    "                                       \"amt_alipay_njhs\",\"qty_alipay_njhs\",\"unum_alipay_njhs\"],parse_dates=[\"date\"])#把date字段转换为日期类型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>item_id</th>\n",
       "      <th>cate_id</th>\n",
       "      <th>cate_level_id</th>\n",
       "      <th>brand_id</th>\n",
       "      <th>supplier_id</th>\n",
       "      <th>pv_ipv</th>\n",
       "      <th>pv_uv</th>\n",
       "      <th>cart_ipv</th>\n",
       "      <th>cart_uv</th>\n",
       "      <th>...</th>\n",
       "      <th>ss_pv_ipv</th>\n",
       "      <th>jhs_pv_ipv</th>\n",
       "      <th>ztc_pv_uv</th>\n",
       "      <th>tbk_pv_uv</th>\n",
       "      <th>ss_pv_uv</th>\n",
       "      <th>jhs_pv_uv</th>\n",
       "      <th>num_alipay_njhs</th>\n",
       "      <th>amt_alipay_njhs</th>\n",
       "      <th>qty_alipay_njhs</th>\n",
       "      <th>unum_alipay_njhs</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2015-11-07</td>\n",
       "      <td>100038</td>\n",
       "      <td>37</td>\n",
       "      <td>11</td>\n",
       "      <td>480</td>\n",
       "      <td>1162</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2015-08-09</td>\n",
       "      <td>100038</td>\n",
       "      <td>37</td>\n",
       "      <td>11</td>\n",
       "      <td>480</td>\n",
       "      <td>1162</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2015-11-10</td>\n",
       "      <td>100038</td>\n",
       "      <td>37</td>\n",
       "      <td>11</td>\n",
       "      <td>480</td>\n",
       "      <td>1162</td>\n",
       "      <td>19</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2015-09-25</td>\n",
       "      <td>100038</td>\n",
       "      <td>37</td>\n",
       "      <td>11</td>\n",
       "      <td>480</td>\n",
       "      <td>1162</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2015-07-26</td>\n",
       "      <td>100038</td>\n",
       "      <td>37</td>\n",
       "      <td>11</td>\n",
       "      <td>480</td>\n",
       "      <td>1162</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 31 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        date  item_id  cate_id  cate_level_id  brand_id  supplier_id  pv_ipv  \\\n",
       "0 2015-11-07   100038       37             11       480         1162       2   \n",
       "1 2015-08-09   100038       37             11       480         1162       3   \n",
       "2 2015-11-10   100038       37             11       480         1162      19   \n",
       "3 2015-09-25   100038       37             11       480         1162       4   \n",
       "4 2015-07-26   100038       37             11       480         1162       2   \n",
       "\n",
       "   pv_uv  cart_ipv  cart_uv  ...  ss_pv_ipv  jhs_pv_ipv  ztc_pv_uv  tbk_pv_uv  \\\n",
       "0      1         0        0  ...          0           0          0          0   \n",
       "1      1         0        0  ...          0           0          0          0   \n",
       "2      3         3        2  ...          0           0          0          0   \n",
       "3      2         0        0  ...          0           0          0          0   \n",
       "4      2         0        0  ...          2           0          0          0   \n",
       "\n",
       "   ss_pv_uv  jhs_pv_uv  num_alipay_njhs  amt_alipay_njhs  qty_alipay_njhs  \\\n",
       "0         0          0                0              0.0                0   \n",
       "1         0          0                0              0.0                0   \n",
       "2         0          0                0              0.0                0   \n",
       "3         0          0                0              0.0                0   \n",
       "4         2          0                0              0.0                0   \n",
       "\n",
       "   unum_alipay_njhs  \n",
       "0                 0  \n",
       "1                 0  \n",
       "2                 0  \n",
       "3                 0  \n",
       "4                 0  \n",
       "\n",
       "[5 rows x 31 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tp.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 特征工程\n",
    "\n",
    "* 商品种类多样\n",
    "* 商品会上架下架\n",
    "* 商品会做活动，因此销量有剧烈变化的情况\n",
    "* 同类商品有竞争"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 各个行为按周的统计特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "feature_1=tp.groupby([pd.Grouper(key=\"date\",freq=\"W\"),\"item_id\",\"cate_id\",\"cate_level_id\",\"brand_id\",\"supplier_id\"]).agg([\"sum\",\"std\",\"max\",\"mean\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"4\" halign=\"left\">pv_ipv</th>\n",
       "      <th colspan=\"4\" halign=\"left\">pv_uv</th>\n",
       "      <th colspan=\"2\" halign=\"left\">cart_ipv</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"2\" halign=\"left\">amt_alipay_njhs</th>\n",
       "      <th colspan=\"4\" halign=\"left\">qty_alipay_njhs</th>\n",
       "      <th colspan=\"4\" halign=\"left\">unum_alipay_njhs</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>sum</th>\n",
       "      <th>std</th>\n",
       "      <th>max</th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "      <th>std</th>\n",
       "      <th>max</th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "      <th>std</th>\n",
       "      <th>...</th>\n",
       "      <th>max</th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "      <th>std</th>\n",
       "      <th>max</th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "      <th>std</th>\n",
       "      <th>max</th>\n",
       "      <th>mean</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th>item_id</th>\n",
       "      <th>cate_id</th>\n",
       "      <th>cate_level_id</th>\n",
       "      <th>brand_id</th>\n",
       "      <th>supplier_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">2014-10-12</th>\n",
       "      <th>132</th>\n",
       "      <th>18</th>\n",
       "      <th>12</th>\n",
       "      <th>203</th>\n",
       "      <th>1976</th>\n",
       "      <td>5</td>\n",
       "      <td>1.154701</td>\n",
       "      <td>3</td>\n",
       "      <td>1.666667</td>\n",
       "      <td>5</td>\n",
       "      <td>1.154701</td>\n",
       "      <td>3</td>\n",
       "      <td>1.666667</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>330</th>\n",
       "      <th>20</th>\n",
       "      <th>5</th>\n",
       "      <th>400</th>\n",
       "      <th>1556</th>\n",
       "      <td>66</td>\n",
       "      <td>8.544004</td>\n",
       "      <td>31</td>\n",
       "      <td>22.000000</td>\n",
       "      <td>37</td>\n",
       "      <td>3.214550</td>\n",
       "      <td>16</td>\n",
       "      <td>12.333333</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>903</th>\n",
       "      <th>17</th>\n",
       "      <th>12</th>\n",
       "      <th>666</th>\n",
       "      <th>431</th>\n",
       "      <td>2914</td>\n",
       "      <td>301.737192</td>\n",
       "      <td>1242</td>\n",
       "      <td>971.333333</td>\n",
       "      <td>1785</td>\n",
       "      <td>216.016203</td>\n",
       "      <td>792</td>\n",
       "      <td>595.000000</td>\n",
       "      <td>49</td>\n",
       "      <td>5.686241</td>\n",
       "      <td>...</td>\n",
       "      <td>10073.79</td>\n",
       "      <td>6613.046667</td>\n",
       "      <td>4</td>\n",
       "      <td>1.154701</td>\n",
       "      <td>2</td>\n",
       "      <td>1.333333</td>\n",
       "      <td>4</td>\n",
       "      <td>1.154701</td>\n",
       "      <td>2</td>\n",
       "      <td>1.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1013</th>\n",
       "      <th>39</th>\n",
       "      <th>12</th>\n",
       "      <th>406</th>\n",
       "      <th>1335</th>\n",
       "      <td>53</td>\n",
       "      <td>4.041452</td>\n",
       "      <td>22</td>\n",
       "      <td>17.666667</td>\n",
       "      <td>37</td>\n",
       "      <td>0.577350</td>\n",
       "      <td>13</td>\n",
       "      <td>12.333333</td>\n",
       "      <td>4</td>\n",
       "      <td>2.309401</td>\n",
       "      <td>...</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1053</th>\n",
       "      <th>37</th>\n",
       "      <th>11</th>\n",
       "      <th>480</th>\n",
       "      <th>673</th>\n",
       "      <td>285</td>\n",
       "      <td>19.078784</td>\n",
       "      <td>115</td>\n",
       "      <td>95.000000</td>\n",
       "      <td>148</td>\n",
       "      <td>11.846237</td>\n",
       "      <td>63</td>\n",
       "      <td>49.333333</td>\n",
       "      <td>27</td>\n",
       "      <td>6.082763</td>\n",
       "      <td>...</td>\n",
       "      <td>1807.06</td>\n",
       "      <td>602.353333</td>\n",
       "      <td>5</td>\n",
       "      <td>2.886751</td>\n",
       "      <td>5</td>\n",
       "      <td>1.666667</td>\n",
       "      <td>3</td>\n",
       "      <td>1.732051</td>\n",
       "      <td>3</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 100 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                              pv_ipv  \\\n",
       "                                                                 sum   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id          \n",
       "2014-10-12 132     18      12            203      1976             5   \n",
       "           330     20      5             400      1556            66   \n",
       "           903     17      12            666      431           2914   \n",
       "           1013    39      12            406      1335            53   \n",
       "           1053    37      11            480      673            285   \n",
       "\n",
       "                                                                           \\\n",
       "                                                                      std   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id               \n",
       "2014-10-12 132     18      12            203      1976           1.154701   \n",
       "           330     20      5             400      1556           8.544004   \n",
       "           903     17      12            666      431          301.737192   \n",
       "           1013    39      12            406      1335           4.041452   \n",
       "           1053    37      11            480      673           19.078784   \n",
       "\n",
       "                                                                     \\\n",
       "                                                                max   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id         \n",
       "2014-10-12 132     18      12            203      1976            3   \n",
       "           330     20      5             400      1556           31   \n",
       "           903     17      12            666      431          1242   \n",
       "           1013    39      12            406      1335           22   \n",
       "           1053    37      11            480      673           115   \n",
       "\n",
       "                                                                           \\\n",
       "                                                                     mean   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id               \n",
       "2014-10-12 132     18      12            203      1976           1.666667   \n",
       "           330     20      5             400      1556          22.000000   \n",
       "           903     17      12            666      431          971.333333   \n",
       "           1013    39      12            406      1335          17.666667   \n",
       "           1053    37      11            480      673           95.000000   \n",
       "\n",
       "                                                              pv_uv  \\\n",
       "                                                                sum   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id         \n",
       "2014-10-12 132     18      12            203      1976            5   \n",
       "           330     20      5             400      1556           37   \n",
       "           903     17      12            666      431          1785   \n",
       "           1013    39      12            406      1335           37   \n",
       "           1053    37      11            480      673           148   \n",
       "\n",
       "                                                                           \\\n",
       "                                                                      std   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id               \n",
       "2014-10-12 132     18      12            203      1976           1.154701   \n",
       "           330     20      5             400      1556           3.214550   \n",
       "           903     17      12            666      431          216.016203   \n",
       "           1013    39      12            406      1335           0.577350   \n",
       "           1053    37      11            480      673           11.846237   \n",
       "\n",
       "                                                                    \\\n",
       "                                                               max   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id        \n",
       "2014-10-12 132     18      12            203      1976           3   \n",
       "           330     20      5             400      1556          16   \n",
       "           903     17      12            666      431          792   \n",
       "           1013    39      12            406      1335          13   \n",
       "           1053    37      11            480      673           63   \n",
       "\n",
       "                                                                           \\\n",
       "                                                                     mean   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id               \n",
       "2014-10-12 132     18      12            203      1976           1.666667   \n",
       "           330     20      5             400      1556          12.333333   \n",
       "           903     17      12            666      431          595.000000   \n",
       "           1013    39      12            406      1335          12.333333   \n",
       "           1053    37      11            480      673           49.333333   \n",
       "\n",
       "                                                              cart_ipv  \\\n",
       "                                                                   sum   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id            \n",
       "2014-10-12 132     18      12            203      1976               0   \n",
       "           330     20      5             400      1556               0   \n",
       "           903     17      12            666      431               49   \n",
       "           1013    39      12            406      1335               4   \n",
       "           1053    37      11            480      673               27   \n",
       "\n",
       "                                                                         ...  \\\n",
       "                                                                    std  ...   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id            ...   \n",
       "2014-10-12 132     18      12            203      1976         0.000000  ...   \n",
       "           330     20      5             400      1556         0.000000  ...   \n",
       "           903     17      12            666      431          5.686241  ...   \n",
       "           1013    39      12            406      1335         2.309401  ...   \n",
       "           1053    37      11            480      673          6.082763  ...   \n",
       "\n",
       "                                                              amt_alipay_njhs  \\\n",
       "                                                                          max   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                   \n",
       "2014-10-12 132     18      12            203      1976                   0.00   \n",
       "           330     20      5             400      1556                   0.00   \n",
       "           903     17      12            666      431                10073.79   \n",
       "           1013    39      12            406      1335                   0.00   \n",
       "           1053    37      11            480      673                 1807.06   \n",
       "\n",
       "                                                                            \\\n",
       "                                                                      mean   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                \n",
       "2014-10-12 132     18      12            203      1976            0.000000   \n",
       "           330     20      5             400      1556            0.000000   \n",
       "           903     17      12            666      431          6613.046667   \n",
       "           1013    39      12            406      1335            0.000000   \n",
       "           1053    37      11            480      673           602.353333   \n",
       "\n",
       "                                                              qty_alipay_njhs  \\\n",
       "                                                                          sum   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                   \n",
       "2014-10-12 132     18      12            203      1976                      0   \n",
       "           330     20      5             400      1556                      0   \n",
       "           903     17      12            666      431                       4   \n",
       "           1013    39      12            406      1335                      0   \n",
       "           1053    37      11            480      673                       5   \n",
       "\n",
       "                                                                             \\\n",
       "                                                                    std max   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                 \n",
       "2014-10-12 132     18      12            203      1976         0.000000   0   \n",
       "           330     20      5             400      1556         0.000000   0   \n",
       "           903     17      12            666      431          1.154701   2   \n",
       "           1013    39      12            406      1335         0.000000   0   \n",
       "           1053    37      11            480      673          2.886751   5   \n",
       "\n",
       "                                                                         \\\n",
       "                                                                   mean   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id             \n",
       "2014-10-12 132     18      12            203      1976         0.000000   \n",
       "           330     20      5             400      1556         0.000000   \n",
       "           903     17      12            666      431          1.333333   \n",
       "           1013    39      12            406      1335         0.000000   \n",
       "           1053    37      11            480      673          1.666667   \n",
       "\n",
       "                                                              unum_alipay_njhs  \\\n",
       "                                                                           sum   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                    \n",
       "2014-10-12 132     18      12            203      1976                       0   \n",
       "           330     20      5             400      1556                       0   \n",
       "           903     17      12            666      431                        4   \n",
       "           1013    39      12            406      1335                       0   \n",
       "           1053    37      11            480      673                        3   \n",
       "\n",
       "                                                                             \\\n",
       "                                                                    std max   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                 \n",
       "2014-10-12 132     18      12            203      1976         0.000000   0   \n",
       "           330     20      5             400      1556         0.000000   0   \n",
       "           903     17      12            666      431          1.154701   2   \n",
       "           1013    39      12            406      1335         0.000000   0   \n",
       "           1053    37      11            480      673          1.732051   3   \n",
       "\n",
       "                                                                         \n",
       "                                                                   mean  \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id            \n",
       "2014-10-12 132     18      12            203      1976         0.000000  \n",
       "           330     20      5             400      1556         0.000000  \n",
       "           903     17      12            666      431          1.333333  \n",
       "           1013    39      12            406      1335         0.000000  \n",
       "           1053    37      11            480      673          1.000000  \n",
       "\n",
       "[5 rows x 100 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_1.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 各个行为在最近1天，2天，4天的总量（sum）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5\n",
      "6\n",
      "1\n",
      "4\n"
     ]
    }
   ],
   "source": [
    "#weekday用来把日期转换为星期\n",
    "k=0\n",
    "for i in tp.date.items():\n",
    "    print(i[1].weekday())\n",
    "    k=k+1\n",
    "    if k==4:break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def c1(x):\n",
    "    if x.weekday()==6:\n",
    "        return \"6\"\n",
    "    elif x.weekday()>=4:\n",
    "        return \"45\"\n",
    "    else:\n",
    "        return \"0123\"\n",
    "\n",
    "weekday0=tp.date.apply(c1)#通过c1函数把日期转换为我们想区分的各个时间区间\n",
    "weekday0.name=\"weekday0\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      45\n",
       "1       6\n",
       "2    0123\n",
       "3      45\n",
       "4       6\n",
       "Name: weekday0, dtype: object"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "weekday0.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "tp1=pd.concat([tp,weekday0], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>item_id</th>\n",
       "      <th>cate_id</th>\n",
       "      <th>cate_level_id</th>\n",
       "      <th>brand_id</th>\n",
       "      <th>supplier_id</th>\n",
       "      <th>pv_ipv</th>\n",
       "      <th>pv_uv</th>\n",
       "      <th>cart_ipv</th>\n",
       "      <th>cart_uv</th>\n",
       "      <th>...</th>\n",
       "      <th>jhs_pv_ipv</th>\n",
       "      <th>ztc_pv_uv</th>\n",
       "      <th>tbk_pv_uv</th>\n",
       "      <th>ss_pv_uv</th>\n",
       "      <th>jhs_pv_uv</th>\n",
       "      <th>num_alipay_njhs</th>\n",
       "      <th>amt_alipay_njhs</th>\n",
       "      <th>qty_alipay_njhs</th>\n",
       "      <th>unum_alipay_njhs</th>\n",
       "      <th>weekday0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2015-11-07</td>\n",
       "      <td>100038</td>\n",
       "      <td>37</td>\n",
       "      <td>11</td>\n",
       "      <td>480</td>\n",
       "      <td>1162</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2015-08-09</td>\n",
       "      <td>100038</td>\n",
       "      <td>37</td>\n",
       "      <td>11</td>\n",
       "      <td>480</td>\n",
       "      <td>1162</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2015-11-10</td>\n",
       "      <td>100038</td>\n",
       "      <td>37</td>\n",
       "      <td>11</td>\n",
       "      <td>480</td>\n",
       "      <td>1162</td>\n",
       "      <td>19</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2015-09-25</td>\n",
       "      <td>100038</td>\n",
       "      <td>37</td>\n",
       "      <td>11</td>\n",
       "      <td>480</td>\n",
       "      <td>1162</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2015-07-26</td>\n",
       "      <td>100038</td>\n",
       "      <td>37</td>\n",
       "      <td>11</td>\n",
       "      <td>480</td>\n",
       "      <td>1162</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 32 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        date  item_id  cate_id  cate_level_id  brand_id  supplier_id  pv_ipv  \\\n",
       "0 2015-11-07   100038       37             11       480         1162       2   \n",
       "1 2015-08-09   100038       37             11       480         1162       3   \n",
       "2 2015-11-10   100038       37             11       480         1162      19   \n",
       "3 2015-09-25   100038       37             11       480         1162       4   \n",
       "4 2015-07-26   100038       37             11       480         1162       2   \n",
       "\n",
       "   pv_uv  cart_ipv  cart_uv    ...     jhs_pv_ipv  ztc_pv_uv  tbk_pv_uv  \\\n",
       "0      1         0        0    ...              0          0          0   \n",
       "1      1         0        0    ...              0          0          0   \n",
       "2      3         3        2    ...              0          0          0   \n",
       "3      2         0        0    ...              0          0          0   \n",
       "4      2         0        0    ...              0          0          0   \n",
       "\n",
       "   ss_pv_uv  jhs_pv_uv  num_alipay_njhs  amt_alipay_njhs  qty_alipay_njhs  \\\n",
       "0         0          0                0              0.0                0   \n",
       "1         0          0                0              0.0                0   \n",
       "2         0          0                0              0.0                0   \n",
       "3         0          0                0              0.0                0   \n",
       "4         2          0                0              0.0                0   \n",
       "\n",
       "   unum_alipay_njhs  weekday0  \n",
       "0                 0        45  \n",
       "1                 0         6  \n",
       "2                 0      0123  \n",
       "3                 0        45  \n",
       "4                 0         6  \n",
       "\n",
       "[5 rows x 32 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tp1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 216,
   "metadata": {},
   "outputs": [],
   "source": [
    "#tp1.groupby([pd.Grouper(key=\"date\",freq=\"W\"),\"item_id\",\"cate_id\",\"cate_level_id\",\"brand_id\",\"supplier_id\",\"weekday0\"]).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "#得到每周最后1天（星期天），2天（星期六，五），4天（其他）的各个行为量\n",
    "feature_2=tp1.groupby([pd.Grouper(key=\"date\",freq=\"W\"),\"item_id\",\"cate_id\",\"cate_level_id\",\"brand_id\",\"supplier_id\",\"weekday0\"]).sum()\n",
    "feature_2=feature_2.fillna(0).unstack(level=-1,fill_value=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">pv_ipv</th>\n",
       "      <th colspan=\"3\" halign=\"left\">pv_uv</th>\n",
       "      <th colspan=\"3\" halign=\"left\">cart_ipv</th>\n",
       "      <th>cart_uv</th>\n",
       "      <th>...</th>\n",
       "      <th>num_alipay_njhs</th>\n",
       "      <th colspan=\"3\" halign=\"left\">amt_alipay_njhs</th>\n",
       "      <th colspan=\"3\" halign=\"left\">qty_alipay_njhs</th>\n",
       "      <th colspan=\"3\" halign=\"left\">unum_alipay_njhs</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>weekday0</th>\n",
       "      <th>0123</th>\n",
       "      <th>45</th>\n",
       "      <th>6</th>\n",
       "      <th>0123</th>\n",
       "      <th>45</th>\n",
       "      <th>6</th>\n",
       "      <th>0123</th>\n",
       "      <th>45</th>\n",
       "      <th>6</th>\n",
       "      <th>0123</th>\n",
       "      <th>...</th>\n",
       "      <th>6</th>\n",
       "      <th>0123</th>\n",
       "      <th>45</th>\n",
       "      <th>6</th>\n",
       "      <th>0123</th>\n",
       "      <th>45</th>\n",
       "      <th>6</th>\n",
       "      <th>0123</th>\n",
       "      <th>45</th>\n",
       "      <th>6</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th>item_id</th>\n",
       "      <th>cate_id</th>\n",
       "      <th>cate_level_id</th>\n",
       "      <th>brand_id</th>\n",
       "      <th>supplier_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">2014-10-12</th>\n",
       "      <th>132</th>\n",
       "      <th>18</th>\n",
       "      <th>12</th>\n",
       "      <th>203</th>\n",
       "      <th>1976</th>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>330</th>\n",
       "      <th>20</th>\n",
       "      <th>5</th>\n",
       "      <th>400</th>\n",
       "      <th>1556</th>\n",
       "      <td>0</td>\n",
       "      <td>52</td>\n",
       "      <td>14</td>\n",
       "      <td>0</td>\n",
       "      <td>27</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>903</th>\n",
       "      <th>17</th>\n",
       "      <th>12</th>\n",
       "      <th>666</th>\n",
       "      <th>431</th>\n",
       "      <td>0</td>\n",
       "      <td>1672</td>\n",
       "      <td>1242</td>\n",
       "      <td>0</td>\n",
       "      <td>993</td>\n",
       "      <td>792</td>\n",
       "      <td>0</td>\n",
       "      <td>28</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9765.35</td>\n",
       "      <td>10073.79</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1013</th>\n",
       "      <th>39</th>\n",
       "      <th>12</th>\n",
       "      <th>406</th>\n",
       "      <th>1335</th>\n",
       "      <td>0</td>\n",
       "      <td>31</td>\n",
       "      <td>22</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "      <td>12</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1053</th>\n",
       "      <th>37</th>\n",
       "      <th>11</th>\n",
       "      <th>480</th>\n",
       "      <th>673</th>\n",
       "      <td>0</td>\n",
       "      <td>170</td>\n",
       "      <td>115</td>\n",
       "      <td>0</td>\n",
       "      <td>85</td>\n",
       "      <td>63</td>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1807.06</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 75 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                              pv_ipv        \\\n",
       "weekday0                                                        0123    45   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                \n",
       "2014-10-12 132     18      12            203      1976             0     4   \n",
       "           330     20      5             400      1556             0    52   \n",
       "           903     17      12            666      431              0  1672   \n",
       "           1013    39      12            406      1335             0    31   \n",
       "           1053    37      11            480      673              0   170   \n",
       "\n",
       "                                                                    pv_uv  \\\n",
       "weekday0                                                          6  0123   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id               \n",
       "2014-10-12 132     18      12            203      1976            1     0   \n",
       "           330     20      5             400      1556           14     0   \n",
       "           903     17      12            666      431          1242     0   \n",
       "           1013    39      12            406      1335           22     0   \n",
       "           1053    37      11            480      673           115     0   \n",
       "\n",
       "                                                                         \\\n",
       "weekday0                                                        45    6   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id             \n",
       "2014-10-12 132     18      12            203      1976           4    1   \n",
       "           330     20      5             400      1556          27   10   \n",
       "           903     17      12            666      431          993  792   \n",
       "           1013    39      12            406      1335          25   12   \n",
       "           1053    37      11            480      673           85   63   \n",
       "\n",
       "                                                              cart_ipv      \\\n",
       "weekday0                                                          0123  45   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                \n",
       "2014-10-12 132     18      12            203      1976               0   0   \n",
       "           330     20      5             400      1556               0   0   \n",
       "           903     17      12            666      431                0  28   \n",
       "           1013    39      12            406      1335               0   4   \n",
       "           1053    37      11            480      673                0  21   \n",
       "\n",
       "                                                                  cart_uv ...  \\\n",
       "weekday0                                                        6    0123 ...   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id             ...   \n",
       "2014-10-12 132     18      12            203      1976          0       0 ...   \n",
       "           330     20      5             400      1556          0       0 ...   \n",
       "           903     17      12            666      431          21       0 ...   \n",
       "           1013    39      12            406      1335          0       0 ...   \n",
       "           1053    37      11            480      673           6       0 ...   \n",
       "\n",
       "                                                              num_alipay_njhs  \\\n",
       "weekday0                                                                    6   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                   \n",
       "2014-10-12 132     18      12            203      1976                      0   \n",
       "           330     20      5             400      1556                      0   \n",
       "           903     17      12            666      431                       2   \n",
       "           1013    39      12            406      1335                      0   \n",
       "           1053    37      11            480      673                       0   \n",
       "\n",
       "                                                              amt_alipay_njhs  \\\n",
       "weekday0                                                                 0123   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                   \n",
       "2014-10-12 132     18      12            203      1976                    0.0   \n",
       "           330     20      5             400      1556                    0.0   \n",
       "           903     17      12            666      431                     0.0   \n",
       "           1013    39      12            406      1335                    0.0   \n",
       "           1053    37      11            480      673                     0.0   \n",
       "\n",
       "                                                                        \\\n",
       "weekday0                                                            45   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id            \n",
       "2014-10-12 132     18      12            203      1976            0.00   \n",
       "           330     20      5             400      1556            0.00   \n",
       "           903     17      12            666      431          9765.35   \n",
       "           1013    39      12            406      1335            0.00   \n",
       "           1053    37      11            480      673          1807.06   \n",
       "\n",
       "                                                                         \\\n",
       "weekday0                                                              6   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id             \n",
       "2014-10-12 132     18      12            203      1976             0.00   \n",
       "           330     20      5             400      1556             0.00   \n",
       "           903     17      12            666      431          10073.79   \n",
       "           1013    39      12            406      1335             0.00   \n",
       "           1053    37      11            480      673              0.00   \n",
       "\n",
       "                                                              qty_alipay_njhs  \\\n",
       "weekday0                                                                 0123   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                   \n",
       "2014-10-12 132     18      12            203      1976                      0   \n",
       "           330     20      5             400      1556                      0   \n",
       "           903     17      12            666      431                       0   \n",
       "           1013    39      12            406      1335                      0   \n",
       "           1053    37      11            480      673                       0   \n",
       "\n",
       "                                                                     \\\n",
       "weekday0                                                      45  6   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id         \n",
       "2014-10-12 132     18      12            203      1976         0  0   \n",
       "           330     20      5             400      1556         0  0   \n",
       "           903     17      12            666      431          2  2   \n",
       "           1013    39      12            406      1335         0  0   \n",
       "           1053    37      11            480      673          5  0   \n",
       "\n",
       "                                                              unum_alipay_njhs  \\\n",
       "weekday0                                                                  0123   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                    \n",
       "2014-10-12 132     18      12            203      1976                       0   \n",
       "           330     20      5             400      1556                       0   \n",
       "           903     17      12            666      431                        0   \n",
       "           1013    39      12            406      1335                       0   \n",
       "           1053    37      11            480      673                        0   \n",
       "\n",
       "                                                                     \n",
       "weekday0                                                      45  6  \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id        \n",
       "2014-10-12 132     18      12            203      1976         0  0  \n",
       "           330     20      5             400      1556         0  0  \n",
       "           903     17      12            666      431          2  2  \n",
       "           1013    39      12            406      1335         0  0  \n",
       "           1053    37      11            480      673          3  0  \n",
       "\n",
       "[5 rows x 75 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_2.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 各个类目（cate_id）与品牌(brand_id)的周度总销量\n",
    "\n",
    "* 为什么不做独热特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "feature_cate_id=tp.loc[:,[\"date\",\"cate_id\",\"qty_alipay_njhs\"]].groupby([pd.Grouper(key=\"date\",freq=\"W\"),\"cate_id\"]).sum()\n",
    "feature_brand_id=tp.loc[:,[\"date\",\"brand_id\",\"qty_alipay_njhs\"]].groupby([pd.Grouper(key=\"date\",freq=\"W\"),\"brand_id\"]).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>qty_alipay_njhs</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th>brand_id</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">2014-10-12</th>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>45</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     qty_alipay_njhs\n",
       "date       brand_id                 \n",
       "2014-10-12 2                       2\n",
       "           3                       3\n",
       "           6                       0\n",
       "           26                      6\n",
       "           35                     45"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#feature_cate_id.head()\n",
    "feature_brand_id.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 三方面特征合并"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "feature_12=pd.concat([feature_1,feature_2],axis=1,join_axes=[feature_1.index])#feature_1的索引是最全的，所以以feature_1为准\n",
    "feature_12=feature_12.fillna(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"4\" halign=\"left\">pv_ipv</th>\n",
       "      <th colspan=\"4\" halign=\"left\">pv_uv</th>\n",
       "      <th colspan=\"2\" halign=\"left\">cart_ipv</th>\n",
       "      <th>...</th>\n",
       "      <th>num_alipay_njhs</th>\n",
       "      <th colspan=\"3\" halign=\"left\">amt_alipay_njhs</th>\n",
       "      <th colspan=\"3\" halign=\"left\">qty_alipay_njhs</th>\n",
       "      <th colspan=\"3\" halign=\"left\">unum_alipay_njhs</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>sum</th>\n",
       "      <th>std</th>\n",
       "      <th>max</th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "      <th>std</th>\n",
       "      <th>max</th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "      <th>std</th>\n",
       "      <th>...</th>\n",
       "      <th>6</th>\n",
       "      <th>0123</th>\n",
       "      <th>45</th>\n",
       "      <th>6</th>\n",
       "      <th>0123</th>\n",
       "      <th>45</th>\n",
       "      <th>6</th>\n",
       "      <th>0123</th>\n",
       "      <th>45</th>\n",
       "      <th>6</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th>item_id</th>\n",
       "      <th>cate_id</th>\n",
       "      <th>cate_level_id</th>\n",
       "      <th>brand_id</th>\n",
       "      <th>supplier_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">2014-10-12</th>\n",
       "      <th>132</th>\n",
       "      <th>18</th>\n",
       "      <th>12</th>\n",
       "      <th>203</th>\n",
       "      <th>1976</th>\n",
       "      <td>5</td>\n",
       "      <td>1.154701</td>\n",
       "      <td>3</td>\n",
       "      <td>1.666667</td>\n",
       "      <td>5</td>\n",
       "      <td>1.154701</td>\n",
       "      <td>3</td>\n",
       "      <td>1.666667</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>330</th>\n",
       "      <th>20</th>\n",
       "      <th>5</th>\n",
       "      <th>400</th>\n",
       "      <th>1556</th>\n",
       "      <td>66</td>\n",
       "      <td>8.544004</td>\n",
       "      <td>31</td>\n",
       "      <td>22.000000</td>\n",
       "      <td>37</td>\n",
       "      <td>3.214550</td>\n",
       "      <td>16</td>\n",
       "      <td>12.333333</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>903</th>\n",
       "      <th>17</th>\n",
       "      <th>12</th>\n",
       "      <th>666</th>\n",
       "      <th>431</th>\n",
       "      <td>2914</td>\n",
       "      <td>301.737192</td>\n",
       "      <td>1242</td>\n",
       "      <td>971.333333</td>\n",
       "      <td>1785</td>\n",
       "      <td>216.016203</td>\n",
       "      <td>792</td>\n",
       "      <td>595.000000</td>\n",
       "      <td>49</td>\n",
       "      <td>5.686241</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9765.35</td>\n",
       "      <td>10073.79</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1013</th>\n",
       "      <th>39</th>\n",
       "      <th>12</th>\n",
       "      <th>406</th>\n",
       "      <th>1335</th>\n",
       "      <td>53</td>\n",
       "      <td>4.041452</td>\n",
       "      <td>22</td>\n",
       "      <td>17.666667</td>\n",
       "      <td>37</td>\n",
       "      <td>0.577350</td>\n",
       "      <td>13</td>\n",
       "      <td>12.333333</td>\n",
       "      <td>4</td>\n",
       "      <td>2.309401</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1053</th>\n",
       "      <th>37</th>\n",
       "      <th>11</th>\n",
       "      <th>480</th>\n",
       "      <th>673</th>\n",
       "      <td>285</td>\n",
       "      <td>19.078784</td>\n",
       "      <td>115</td>\n",
       "      <td>95.000000</td>\n",
       "      <td>148</td>\n",
       "      <td>11.846237</td>\n",
       "      <td>63</td>\n",
       "      <td>49.333333</td>\n",
       "      <td>27</td>\n",
       "      <td>6.082763</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1807.06</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 175 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                              pv_ipv  \\\n",
       "                                                                 sum   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id          \n",
       "2014-10-12 132     18      12            203      1976             5   \n",
       "           330     20      5             400      1556            66   \n",
       "           903     17      12            666      431           2914   \n",
       "           1013    39      12            406      1335            53   \n",
       "           1053    37      11            480      673            285   \n",
       "\n",
       "                                                                           \\\n",
       "                                                                      std   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id               \n",
       "2014-10-12 132     18      12            203      1976           1.154701   \n",
       "           330     20      5             400      1556           8.544004   \n",
       "           903     17      12            666      431          301.737192   \n",
       "           1013    39      12            406      1335           4.041452   \n",
       "           1053    37      11            480      673           19.078784   \n",
       "\n",
       "                                                                     \\\n",
       "                                                                max   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id         \n",
       "2014-10-12 132     18      12            203      1976            3   \n",
       "           330     20      5             400      1556           31   \n",
       "           903     17      12            666      431          1242   \n",
       "           1013    39      12            406      1335           22   \n",
       "           1053    37      11            480      673           115   \n",
       "\n",
       "                                                                           \\\n",
       "                                                                     mean   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id               \n",
       "2014-10-12 132     18      12            203      1976           1.666667   \n",
       "           330     20      5             400      1556          22.000000   \n",
       "           903     17      12            666      431          971.333333   \n",
       "           1013    39      12            406      1335          17.666667   \n",
       "           1053    37      11            480      673           95.000000   \n",
       "\n",
       "                                                              pv_uv  \\\n",
       "                                                                sum   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id         \n",
       "2014-10-12 132     18      12            203      1976            5   \n",
       "           330     20      5             400      1556           37   \n",
       "           903     17      12            666      431          1785   \n",
       "           1013    39      12            406      1335           37   \n",
       "           1053    37      11            480      673           148   \n",
       "\n",
       "                                                                           \\\n",
       "                                                                      std   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id               \n",
       "2014-10-12 132     18      12            203      1976           1.154701   \n",
       "           330     20      5             400      1556           3.214550   \n",
       "           903     17      12            666      431          216.016203   \n",
       "           1013    39      12            406      1335           0.577350   \n",
       "           1053    37      11            480      673           11.846237   \n",
       "\n",
       "                                                                    \\\n",
       "                                                               max   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id        \n",
       "2014-10-12 132     18      12            203      1976           3   \n",
       "           330     20      5             400      1556          16   \n",
       "           903     17      12            666      431          792   \n",
       "           1013    39      12            406      1335          13   \n",
       "           1053    37      11            480      673           63   \n",
       "\n",
       "                                                                           \\\n",
       "                                                                     mean   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id               \n",
       "2014-10-12 132     18      12            203      1976           1.666667   \n",
       "           330     20      5             400      1556          12.333333   \n",
       "           903     17      12            666      431          595.000000   \n",
       "           1013    39      12            406      1335          12.333333   \n",
       "           1053    37      11            480      673           49.333333   \n",
       "\n",
       "                                                              cart_ipv  \\\n",
       "                                                                   sum   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id            \n",
       "2014-10-12 132     18      12            203      1976               0   \n",
       "           330     20      5             400      1556               0   \n",
       "           903     17      12            666      431               49   \n",
       "           1013    39      12            406      1335               4   \n",
       "           1053    37      11            480      673               27   \n",
       "\n",
       "                                                                        ...  \\\n",
       "                                                                    std ...   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id           ...   \n",
       "2014-10-12 132     18      12            203      1976         0.000000 ...   \n",
       "           330     20      5             400      1556         0.000000 ...   \n",
       "           903     17      12            666      431          5.686241 ...   \n",
       "           1013    39      12            406      1335         2.309401 ...   \n",
       "           1053    37      11            480      673          6.082763 ...   \n",
       "\n",
       "                                                              num_alipay_njhs  \\\n",
       "                                                                            6   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                   \n",
       "2014-10-12 132     18      12            203      1976                      0   \n",
       "           330     20      5             400      1556                      0   \n",
       "           903     17      12            666      431                       2   \n",
       "           1013    39      12            406      1335                      0   \n",
       "           1053    37      11            480      673                       0   \n",
       "\n",
       "                                                              amt_alipay_njhs  \\\n",
       "                                                                         0123   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                   \n",
       "2014-10-12 132     18      12            203      1976                    0.0   \n",
       "           330     20      5             400      1556                    0.0   \n",
       "           903     17      12            666      431                     0.0   \n",
       "           1013    39      12            406      1335                    0.0   \n",
       "           1053    37      11            480      673                     0.0   \n",
       "\n",
       "                                                                        \\\n",
       "                                                                    45   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id            \n",
       "2014-10-12 132     18      12            203      1976            0.00   \n",
       "           330     20      5             400      1556            0.00   \n",
       "           903     17      12            666      431          9765.35   \n",
       "           1013    39      12            406      1335            0.00   \n",
       "           1053    37      11            480      673          1807.06   \n",
       "\n",
       "                                                                         \\\n",
       "                                                                      6   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id             \n",
       "2014-10-12 132     18      12            203      1976             0.00   \n",
       "           330     20      5             400      1556             0.00   \n",
       "           903     17      12            666      431          10073.79   \n",
       "           1013    39      12            406      1335             0.00   \n",
       "           1053    37      11            480      673              0.00   \n",
       "\n",
       "                                                              qty_alipay_njhs  \\\n",
       "                                                                         0123   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                   \n",
       "2014-10-12 132     18      12            203      1976                      0   \n",
       "           330     20      5             400      1556                      0   \n",
       "           903     17      12            666      431                       0   \n",
       "           1013    39      12            406      1335                      0   \n",
       "           1053    37      11            480      673                       0   \n",
       "\n",
       "                                                                     \\\n",
       "                                                              45  6   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id         \n",
       "2014-10-12 132     18      12            203      1976         0  0   \n",
       "           330     20      5             400      1556         0  0   \n",
       "           903     17      12            666      431          2  2   \n",
       "           1013    39      12            406      1335         0  0   \n",
       "           1053    37      11            480      673          5  0   \n",
       "\n",
       "                                                              unum_alipay_njhs  \\\n",
       "                                                                          0123   \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                    \n",
       "2014-10-12 132     18      12            203      1976                       0   \n",
       "           330     20      5             400      1556                       0   \n",
       "           903     17      12            666      431                        0   \n",
       "           1013    39      12            406      1335                       0   \n",
       "           1053    37      11            480      673                        0   \n",
       "\n",
       "                                                                     \n",
       "                                                              45  6  \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id        \n",
       "2014-10-12 132     18      12            203      1976         0  0  \n",
       "           330     20      5             400      1556         0  0  \n",
       "           903     17      12            666      431          2  2  \n",
       "           1013    39      12            406      1335         0  0  \n",
       "           1053    37      11            480      673          3  0  \n",
       "\n",
       "[5 rows x 175 columns]"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_12.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "#feature_12.columns.ravel()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [],
   "source": [
    "feature_12_0 = feature_12.copy(deep=True)#深复制\n",
    "feature_12_0.columns = [\"_\".join(x) for x in feature_12.columns.ravel()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>pv_ipv_sum</th>\n",
       "      <th>pv_ipv_std</th>\n",
       "      <th>pv_ipv_max</th>\n",
       "      <th>pv_ipv_mean</th>\n",
       "      <th>pv_uv_sum</th>\n",
       "      <th>pv_uv_std</th>\n",
       "      <th>pv_uv_max</th>\n",
       "      <th>pv_uv_mean</th>\n",
       "      <th>cart_ipv_sum</th>\n",
       "      <th>cart_ipv_std</th>\n",
       "      <th>...</th>\n",
       "      <th>num_alipay_njhs_6</th>\n",
       "      <th>amt_alipay_njhs_0123</th>\n",
       "      <th>amt_alipay_njhs_45</th>\n",
       "      <th>amt_alipay_njhs_6</th>\n",
       "      <th>qty_alipay_njhs_0123</th>\n",
       "      <th>qty_alipay_njhs_45</th>\n",
       "      <th>qty_alipay_njhs_6</th>\n",
       "      <th>unum_alipay_njhs_0123</th>\n",
       "      <th>unum_alipay_njhs_45</th>\n",
       "      <th>unum_alipay_njhs_6</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th>item_id</th>\n",
       "      <th>cate_id</th>\n",
       "      <th>cate_level_id</th>\n",
       "      <th>brand_id</th>\n",
       "      <th>supplier_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">2014-10-12</th>\n",
       "      <th>132</th>\n",
       "      <th>18</th>\n",
       "      <th>12</th>\n",
       "      <th>203</th>\n",
       "      <th>1976</th>\n",
       "      <td>5</td>\n",
       "      <td>1.154701</td>\n",
       "      <td>3</td>\n",
       "      <td>1.666667</td>\n",
       "      <td>5</td>\n",
       "      <td>1.154701</td>\n",
       "      <td>3</td>\n",
       "      <td>1.666667</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>330</th>\n",
       "      <th>20</th>\n",
       "      <th>5</th>\n",
       "      <th>400</th>\n",
       "      <th>1556</th>\n",
       "      <td>66</td>\n",
       "      <td>8.544004</td>\n",
       "      <td>31</td>\n",
       "      <td>22.000000</td>\n",
       "      <td>37</td>\n",
       "      <td>3.214550</td>\n",
       "      <td>16</td>\n",
       "      <td>12.333333</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>903</th>\n",
       "      <th>17</th>\n",
       "      <th>12</th>\n",
       "      <th>666</th>\n",
       "      <th>431</th>\n",
       "      <td>2914</td>\n",
       "      <td>301.737192</td>\n",
       "      <td>1242</td>\n",
       "      <td>971.333333</td>\n",
       "      <td>1785</td>\n",
       "      <td>216.016203</td>\n",
       "      <td>792</td>\n",
       "      <td>595.000000</td>\n",
       "      <td>49</td>\n",
       "      <td>5.686241</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9765.35</td>\n",
       "      <td>10073.79</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1013</th>\n",
       "      <th>39</th>\n",
       "      <th>12</th>\n",
       "      <th>406</th>\n",
       "      <th>1335</th>\n",
       "      <td>53</td>\n",
       "      <td>4.041452</td>\n",
       "      <td>22</td>\n",
       "      <td>17.666667</td>\n",
       "      <td>37</td>\n",
       "      <td>0.577350</td>\n",
       "      <td>13</td>\n",
       "      <td>12.333333</td>\n",
       "      <td>4</td>\n",
       "      <td>2.309401</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1053</th>\n",
       "      <th>37</th>\n",
       "      <th>11</th>\n",
       "      <th>480</th>\n",
       "      <th>673</th>\n",
       "      <td>285</td>\n",
       "      <td>19.078784</td>\n",
       "      <td>115</td>\n",
       "      <td>95.000000</td>\n",
       "      <td>148</td>\n",
       "      <td>11.846237</td>\n",
       "      <td>63</td>\n",
       "      <td>49.333333</td>\n",
       "      <td>27</td>\n",
       "      <td>6.082763</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1807.06</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 175 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                               pv_ipv_sum  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id               \n",
       "2014-10-12 132     18      12            203      1976                  5   \n",
       "           330     20      5             400      1556                 66   \n",
       "           903     17      12            666      431                2914   \n",
       "           1013    39      12            406      1335                 53   \n",
       "           1053    37      11            480      673                 285   \n",
       "\n",
       "                                                               pv_ipv_std  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id               \n",
       "2014-10-12 132     18      12            203      1976           1.154701   \n",
       "           330     20      5             400      1556           8.544004   \n",
       "           903     17      12            666      431          301.737192   \n",
       "           1013    39      12            406      1335           4.041452   \n",
       "           1053    37      11            480      673           19.078784   \n",
       "\n",
       "                                                               pv_ipv_max  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id               \n",
       "2014-10-12 132     18      12            203      1976                  3   \n",
       "           330     20      5             400      1556                 31   \n",
       "           903     17      12            666      431                1242   \n",
       "           1013    39      12            406      1335                 22   \n",
       "           1053    37      11            480      673                 115   \n",
       "\n",
       "                                                               pv_ipv_mean  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                \n",
       "2014-10-12 132     18      12            203      1976            1.666667   \n",
       "           330     20      5             400      1556           22.000000   \n",
       "           903     17      12            666      431           971.333333   \n",
       "           1013    39      12            406      1335           17.666667   \n",
       "           1053    37      11            480      673            95.000000   \n",
       "\n",
       "                                                               pv_uv_sum  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id              \n",
       "2014-10-12 132     18      12            203      1976                 5   \n",
       "           330     20      5             400      1556                37   \n",
       "           903     17      12            666      431               1785   \n",
       "           1013    39      12            406      1335                37   \n",
       "           1053    37      11            480      673                148   \n",
       "\n",
       "                                                                pv_uv_std  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id               \n",
       "2014-10-12 132     18      12            203      1976           1.154701   \n",
       "           330     20      5             400      1556           3.214550   \n",
       "           903     17      12            666      431          216.016203   \n",
       "           1013    39      12            406      1335           0.577350   \n",
       "           1053    37      11            480      673           11.846237   \n",
       "\n",
       "                                                               pv_uv_max  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id              \n",
       "2014-10-12 132     18      12            203      1976                 3   \n",
       "           330     20      5             400      1556                16   \n",
       "           903     17      12            666      431                792   \n",
       "           1013    39      12            406      1335                13   \n",
       "           1053    37      11            480      673                 63   \n",
       "\n",
       "                                                               pv_uv_mean  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id               \n",
       "2014-10-12 132     18      12            203      1976           1.666667   \n",
       "           330     20      5             400      1556          12.333333   \n",
       "           903     17      12            666      431          595.000000   \n",
       "           1013    39      12            406      1335          12.333333   \n",
       "           1053    37      11            480      673           49.333333   \n",
       "\n",
       "                                                               cart_ipv_sum  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                 \n",
       "2014-10-12 132     18      12            203      1976                    0   \n",
       "           330     20      5             400      1556                    0   \n",
       "           903     17      12            666      431                    49   \n",
       "           1013    39      12            406      1335                    4   \n",
       "           1053    37      11            480      673                    27   \n",
       "\n",
       "                                                               cart_ipv_std  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                 \n",
       "2014-10-12 132     18      12            203      1976             0.000000   \n",
       "           330     20      5             400      1556             0.000000   \n",
       "           903     17      12            666      431              5.686241   \n",
       "           1013    39      12            406      1335             2.309401   \n",
       "           1053    37      11            480      673              6.082763   \n",
       "\n",
       "                                                                      ...          \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id         ...           \n",
       "2014-10-12 132     18      12            203      1976                ...           \n",
       "           330     20      5             400      1556                ...           \n",
       "           903     17      12            666      431                 ...           \n",
       "           1013    39      12            406      1335                ...           \n",
       "           1053    37      11            480      673                 ...           \n",
       "\n",
       "                                                               num_alipay_njhs_6  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                      \n",
       "2014-10-12 132     18      12            203      1976                         0   \n",
       "           330     20      5             400      1556                         0   \n",
       "           903     17      12            666      431                          2   \n",
       "           1013    39      12            406      1335                         0   \n",
       "           1053    37      11            480      673                          0   \n",
       "\n",
       "                                                               amt_alipay_njhs_0123  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                         \n",
       "2014-10-12 132     18      12            203      1976                          0.0   \n",
       "           330     20      5             400      1556                          0.0   \n",
       "           903     17      12            666      431                           0.0   \n",
       "           1013    39      12            406      1335                          0.0   \n",
       "           1053    37      11            480      673                           0.0   \n",
       "\n",
       "                                                               amt_alipay_njhs_45  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                       \n",
       "2014-10-12 132     18      12            203      1976                       0.00   \n",
       "           330     20      5             400      1556                       0.00   \n",
       "           903     17      12            666      431                     9765.35   \n",
       "           1013    39      12            406      1335                       0.00   \n",
       "           1053    37      11            480      673                     1807.06   \n",
       "\n",
       "                                                               amt_alipay_njhs_6  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                      \n",
       "2014-10-12 132     18      12            203      1976                      0.00   \n",
       "           330     20      5             400      1556                      0.00   \n",
       "           903     17      12            666      431                   10073.79   \n",
       "           1013    39      12            406      1335                      0.00   \n",
       "           1053    37      11            480      673                       0.00   \n",
       "\n",
       "                                                               qty_alipay_njhs_0123  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                         \n",
       "2014-10-12 132     18      12            203      1976                            0   \n",
       "           330     20      5             400      1556                            0   \n",
       "           903     17      12            666      431                             0   \n",
       "           1013    39      12            406      1335                            0   \n",
       "           1053    37      11            480      673                             0   \n",
       "\n",
       "                                                               qty_alipay_njhs_45  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                       \n",
       "2014-10-12 132     18      12            203      1976                          0   \n",
       "           330     20      5             400      1556                          0   \n",
       "           903     17      12            666      431                           2   \n",
       "           1013    39      12            406      1335                          0   \n",
       "           1053    37      11            480      673                           5   \n",
       "\n",
       "                                                               qty_alipay_njhs_6  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                      \n",
       "2014-10-12 132     18      12            203      1976                         0   \n",
       "           330     20      5             400      1556                         0   \n",
       "           903     17      12            666      431                          2   \n",
       "           1013    39      12            406      1335                         0   \n",
       "           1053    37      11            480      673                          0   \n",
       "\n",
       "                                                               unum_alipay_njhs_0123  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                          \n",
       "2014-10-12 132     18      12            203      1976                             0   \n",
       "           330     20      5             400      1556                             0   \n",
       "           903     17      12            666      431                              0   \n",
       "           1013    39      12            406      1335                             0   \n",
       "           1053    37      11            480      673                              0   \n",
       "\n",
       "                                                               unum_alipay_njhs_45  \\\n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                        \n",
       "2014-10-12 132     18      12            203      1976                           0   \n",
       "           330     20      5             400      1556                           0   \n",
       "           903     17      12            666      431                            2   \n",
       "           1013    39      12            406      1335                           0   \n",
       "           1053    37      11            480      673                            3   \n",
       "\n",
       "                                                               unum_alipay_njhs_6  \n",
       "date       item_id cate_id cate_level_id brand_id supplier_id                      \n",
       "2014-10-12 132     18      12            203      1976                          0  \n",
       "           330     20      5             400      1556                          0  \n",
       "           903     17      12            666      431                           2  \n",
       "           1013    39      12            406      1335                          0  \n",
       "           1053    37      11            480      673                           0  \n",
       "\n",
       "[5 rows x 175 columns]"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_12_0.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "feature_12_0.reset_index(inplace=True)#这段代码不要多次运行，否则结果会变化\n",
    "feature_cate_id.reset_index(inplace=True)\n",
    "feature_brand_id.reset_index(inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>brand_id</th>\n",
       "      <th>qty_alipay_njhs</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2014-10-12</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2014-10-12</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2014-10-12</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2014-10-12</td>\n",
       "      <td>26</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2014-10-12</td>\n",
       "      <td>35</td>\n",
       "      <td>45</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        date  brand_id  qty_alipay_njhs\n",
       "0 2014-10-12         2                2\n",
       "1 2014-10-12         3                3\n",
       "2 2014-10-12         6                0\n",
       "3 2014-10-12        26                6\n",
       "4 2014-10-12        35               45"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#feature_cate_id.head()\n",
    "feature_brand_id.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "feature_123=pd.merge(feature_12_0,feature_cate_id,on=[\"date\",\"cate_id\"])\n",
    "feature_123=pd.merge(feature_123,feature_brand_id,on=[\"date\",\"brand_id\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 217,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>pv_ipv_sum</th>\n",
       "      <th>pv_ipv_std</th>\n",
       "      <th>pv_ipv_max</th>\n",
       "      <th>pv_ipv_mean</th>\n",
       "      <th>pv_uv_sum</th>\n",
       "      <th>pv_uv_std</th>\n",
       "      <th>pv_uv_max</th>\n",
       "      <th>pv_uv_mean</th>\n",
       "      <th>cart_ipv_sum</th>\n",
       "      <th>cart_ipv_std</th>\n",
       "      <th>...</th>\n",
       "      <th>amt_alipay_njhs_45</th>\n",
       "      <th>amt_alipay_njhs_6</th>\n",
       "      <th>qty_alipay_njhs_0123</th>\n",
       "      <th>qty_alipay_njhs_45</th>\n",
       "      <th>qty_alipay_njhs_6</th>\n",
       "      <th>unum_alipay_njhs_0123</th>\n",
       "      <th>unum_alipay_njhs_45</th>\n",
       "      <th>unum_alipay_njhs_6</th>\n",
       "      <th>qty_alipay_njhs_x</th>\n",
       "      <th>qty_alipay_njhs_y</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th>item_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">2014-10-12</th>\n",
       "      <th>132</th>\n",
       "      <td>5</td>\n",
       "      <td>1.154701</td>\n",
       "      <td>3</td>\n",
       "      <td>1.666667</td>\n",
       "      <td>5</td>\n",
       "      <td>1.154701</td>\n",
       "      <td>3</td>\n",
       "      <td>1.666667</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>55</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17097</th>\n",
       "      <td>1217</td>\n",
       "      <td>131.062326</td>\n",
       "      <td>557</td>\n",
       "      <td>405.666667</td>\n",
       "      <td>571</td>\n",
       "      <td>66.425397</td>\n",
       "      <td>267</td>\n",
       "      <td>190.333333</td>\n",
       "      <td>50</td>\n",
       "      <td>4.932883</td>\n",
       "      <td>...</td>\n",
       "      <td>3332.49</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>24</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50742</th>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>24</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>74664</th>\n",
       "      <td>3</td>\n",
       "      <td>0.707107</td>\n",
       "      <td>2</td>\n",
       "      <td>1.500000</td>\n",
       "      <td>3</td>\n",
       "      <td>0.707107</td>\n",
       "      <td>2</td>\n",
       "      <td>1.500000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>24</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>139306</th>\n",
       "      <td>64</td>\n",
       "      <td>5.507571</td>\n",
       "      <td>25</td>\n",
       "      <td>21.333333</td>\n",
       "      <td>30</td>\n",
       "      <td>3.464102</td>\n",
       "      <td>14</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>24</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 177 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                    pv_ipv_sum  pv_ipv_std  pv_ipv_max  pv_ipv_mean  \\\n",
       "date       item_id                                                    \n",
       "2014-10-12 132               5    1.154701           3     1.666667   \n",
       "           17097          1217  131.062326         557   405.666667   \n",
       "           50742             1    0.000000           1     1.000000   \n",
       "           74664             3    0.707107           2     1.500000   \n",
       "           139306           64    5.507571          25    21.333333   \n",
       "\n",
       "                    pv_uv_sum  pv_uv_std  pv_uv_max  pv_uv_mean  cart_ipv_sum  \\\n",
       "date       item_id                                                              \n",
       "2014-10-12 132              5   1.154701          3    1.666667             0   \n",
       "           17097          571  66.425397        267  190.333333            50   \n",
       "           50742            1   0.000000          1    1.000000             0   \n",
       "           74664            3   0.707107          2    1.500000             0   \n",
       "           139306          30   3.464102         14   10.000000             0   \n",
       "\n",
       "                    cart_ipv_std        ...          amt_alipay_njhs_45  \\\n",
       "date       item_id                      ...                               \n",
       "2014-10-12 132          0.000000        ...                        0.00   \n",
       "           17097        4.932883        ...                     3332.49   \n",
       "           50742        0.000000        ...                        0.00   \n",
       "           74664        0.000000        ...                        0.00   \n",
       "           139306       0.000000        ...                        0.00   \n",
       "\n",
       "                    amt_alipay_njhs_6  qty_alipay_njhs_0123  \\\n",
       "date       item_id                                            \n",
       "2014-10-12 132                    0.0                     0   \n",
       "           17097                  0.0                     0   \n",
       "           50742                  0.0                     0   \n",
       "           74664                  0.0                     0   \n",
       "           139306                 0.0                     0   \n",
       "\n",
       "                    qty_alipay_njhs_45  qty_alipay_njhs_6  \\\n",
       "date       item_id                                          \n",
       "2014-10-12 132                       0                  0   \n",
       "           17097                     1                  0   \n",
       "           50742                     0                  0   \n",
       "           74664                     0                  0   \n",
       "           139306                    0                  0   \n",
       "\n",
       "                    unum_alipay_njhs_0123  unum_alipay_njhs_45  \\\n",
       "date       item_id                                               \n",
       "2014-10-12 132                          0                    0   \n",
       "           17097                        0                    1   \n",
       "           50742                        0                    0   \n",
       "           74664                        0                    0   \n",
       "           139306                       0                    0   \n",
       "\n",
       "                    unum_alipay_njhs_6  qty_alipay_njhs_x  qty_alipay_njhs_y  \n",
       "date       item_id                                                            \n",
       "2014-10-12 132                       0                 55                 27  \n",
       "           17097                     0                 24                 27  \n",
       "           50742                     0                 24                 27  \n",
       "           74664                     0                 24                 27  \n",
       "           139306                    0                 24                 27  \n",
       "\n",
       "[5 rows x 177 columns]"
      ]
     },
     "execution_count": 217,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_123.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "feature_123=feature_123.drop([\"cate_id\",\"cate_level_id\",\"brand_id\",\"supplier_id\"], axis=1)\n",
    "feature_123.set_index(['date',\"item_id\"], inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 简单规则（直接用前一周量为预测量）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "#feature_123.qty_alipay_njhs_sum"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 220,
   "metadata": {},
   "outputs": [],
   "source": [
    "#feature_123.loc[pd.to_datetime(\"2015-10-25\"),\"qty_alipay_njhs_sum\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_perd=feature_123.loc[pd.to_datetime(\"2015-10-25\"),\"qty_alipay_njhs_sum\"]\n",
    "y_test=feature_123.loc[pd.to_datetime(\"2015-11-01\"),\"qty_alipay_njhs_sum\"]\n",
    "y_perd.name=\"y_perd\"\n",
    "y_test.name=\"y_test\"\n",
    "y_perd_test=pd.concat([y_perd,y_test], axis=1,join_axes=[y_perd.index])#计算损失都以前一周的预测索引为准\n",
    "y_perd_test=y_perd_test.fillna(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>y_perd</th>\n",
       "      <th>y_test</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>item_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>132</th>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2181</th>\n",
       "      <td>3</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17097</th>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19329</th>\n",
       "      <td>6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20340</th>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         y_perd  y_test\n",
       "item_id                \n",
       "132           0     0.0\n",
       "2181          3     7.0\n",
       "17097         0     0.0\n",
       "19329         6     2.0\n",
       "20340         0     2.0"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_perd_test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5011.304506699147"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mean_squared_error(y_perd_test.y_perd.values,y_perd_test.y_test.values)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 滑窗采样与特征矩阵构建\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 训练集构建\n",
    "\n",
    "2015-10-25之前为训练集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "821"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(y_perd)#样本太少，需要做滑窗"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Timestamp('2014-10-10 00:00:00')"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tp.date.min()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2014-10-26', '2014-11-02', '2014-11-09', '2014-11-16',\n",
       "               '2014-11-23', '2014-11-30', '2014-12-07', '2014-12-14',\n",
       "               '2014-12-21', '2014-12-28', '2015-01-04', '2015-01-11',\n",
       "               '2015-01-18', '2015-01-25', '2015-02-01', '2015-02-08',\n",
       "               '2015-02-15', '2015-02-22', '2015-03-01', '2015-03-08',\n",
       "               '2015-03-15', '2015-03-22', '2015-03-29', '2015-04-05',\n",
       "               '2015-04-12', '2015-04-19', '2015-04-26', '2015-05-03',\n",
       "               '2015-05-10', '2015-05-17', '2015-05-24', '2015-05-31',\n",
       "               '2015-06-07', '2015-06-14', '2015-06-21', '2015-06-28',\n",
       "               '2015-07-05', '2015-07-12', '2015-07-19', '2015-07-26',\n",
       "               '2015-08-02', '2015-08-09', '2015-08-16', '2015-08-23',\n",
       "               '2015-08-30', '2015-09-06', '2015-09-13', '2015-09-20',\n",
       "               '2015-09-27', '2015-10-04', '2015-10-11', '2015-10-18',\n",
       "               '2015-10-25'],\n",
       "              dtype='datetime64[ns]', freq='W-SUN')"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r = pd.date_range('2014-10-26', periods=53, freq='w')\n",
    "r"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "r = pd.date_range('2014-10-26', periods=53, freq='w')\n",
    "n=0\n",
    "for i in r:\n",
    "    x_train=feature_123.loc[i-pd.to_timedelta(7,\"D\"),:]#获取特征\n",
    "    y_train=feature_123.loc[i,\"qty_alipay_njhs_sum\"]#获取目标\n",
    "    xy_one=pd.concat([x_train,y_train], axis=1,join_axes=[x_train.index])#索引对齐\n",
    "    xy_one=xy_one.fillna(0)\n",
    "    if n==0:\n",
    "        xy_train_values=xy_one.values#第一次直接赋值不用累加\n",
    "        n=1\n",
    "    else:\n",
    "        xy_train_values=np.concatenate([xy_train_values,xy_one.values],axis=0)\n",
    "    #print(i-pd.to_timedelta(7,\"D\"),i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(23402, 178)"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xy_train_values.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train_0=xy_train_values[:,0:176]\n",
    "y_train_0=xy_train_values[:,177]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 测试集\n",
    "\n",
    "2015-10-25/2015-11-01为测试集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_test=feature_123.loc[pd.to_datetime(\"2015-10-25\"),:]\n",
    "y_test=feature_123.loc[pd.to_datetime(\"2015-11-01\"),\"qty_alipay_njhs_sum\"]\n",
    "xy_test=pd.concat([x_test,y_test], axis=1,join_axes=[x_test.index])\n",
    "xy_test=xy_test.fillna(0)\n",
    "x_test_0=xy_test.values[:,0:176]\n",
    "y_test_0=xy_test.values[:,177]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Lightgbm 与特征重要性"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1]\tvalid_0's l2: 12917.3\n",
      "Training until validation scores don't improve for 5 rounds.\n",
      "[2]\tvalid_0's l2: 11316.9\n",
      "[3]\tvalid_0's l2: 10089.7\n",
      "[4]\tvalid_0's l2: 9220.58\n",
      "[5]\tvalid_0's l2: 8267.85\n",
      "[6]\tvalid_0's l2: 7550.82\n",
      "[7]\tvalid_0's l2: 7048.99\n",
      "[8]\tvalid_0's l2: 6576.03\n",
      "[9]\tvalid_0's l2: 6206.84\n",
      "[10]\tvalid_0's l2: 5962.11\n",
      "[11]\tvalid_0's l2: 5759.33\n",
      "[12]\tvalid_0's l2: 5552.14\n",
      "[13]\tvalid_0's l2: 5405.68\n",
      "[14]\tvalid_0's l2: 5345.49\n",
      "[15]\tvalid_0's l2: 5242.19\n",
      "[16]\tvalid_0's l2: 5125.92\n",
      "[17]\tvalid_0's l2: 5038.97\n",
      "[18]\tvalid_0's l2: 4982.12\n",
      "[19]\tvalid_0's l2: 4947.08\n",
      "[20]\tvalid_0's l2: 4840.35\n",
      "[21]\tvalid_0's l2: 4810.87\n",
      "[22]\tvalid_0's l2: 4787.06\n",
      "[23]\tvalid_0's l2: 4744.68\n",
      "[24]\tvalid_0's l2: 4723.02\n",
      "[25]\tvalid_0's l2: 4665.1\n",
      "[26]\tvalid_0's l2: 4596.17\n",
      "[27]\tvalid_0's l2: 4592.85\n",
      "[28]\tvalid_0's l2: 4561.01\n",
      "[29]\tvalid_0's l2: 4544.31\n",
      "[30]\tvalid_0's l2: 4529.38\n",
      "[31]\tvalid_0's l2: 4526.53\n",
      "[32]\tvalid_0's l2: 4522.68\n",
      "[33]\tvalid_0's l2: 4535.72\n",
      "[34]\tvalid_0's l2: 4528.32\n",
      "[35]\tvalid_0's l2: 4571.82\n",
      "[36]\tvalid_0's l2: 4566.49\n",
      "[37]\tvalid_0's l2: 4573.38\n",
      "Early stopping, best iteration is:\n",
      "[32]\tvalid_0's l2: 4522.68\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n",
       "       importance_type='split', learning_rate=0.1, max_depth=-1,\n",
       "       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n",
       "       n_estimators=100, n_jobs=-1, num_leaves=31, objective='regression',\n",
       "       random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,\n",
       "       subsample=1.0, subsample_for_bin=200000, subsample_freq=0)"
      ]
     },
     "execution_count": 148,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gbm = lgb.LGBMRegressor(objective='regression',num_leaves=31,learning_rate=0.1,n_estimators=100)\n",
    "gbm.fit(x_train_0, y_train_0,eval_set=[(x_test_0, y_test_0)],early_stopping_rounds=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0,  8,  1,  1,  1,  6,  5,  2,  4,  7,  2,  5,  4,  5,  3,  0,  2,\n",
       "        5,  7,  0,  2,  1,  4,  3,  7, 13, 11,  1, 10,  2,  5,  1,  7,  1,\n",
       "        2,  5,  2,  7,  4,  0,  3,  3,  3,  1, 10,  1,  3,  3,  3,  2,  3,\n",
       "        1,  1,  3,  2,  3,  2,  6,  8,  0,  1,  9,  1,  3,  3,  2,  2,  0,\n",
       "        2,  5,  4,  0,  3,  8,  3,  0,  0,  6, 10,  2,  2,  5,  1,  0, 10,\n",
       "        1,  2, 10,  1,  7,  8,  0, 13,  1,  2, 22,  4,  3,  4,  3,  1,  2,\n",
       "        4,  4,  4,  5,  4, 12, 50,  2,  9, 33,  4, 10, 15,  2, 11, 13,  3,\n",
       "        9, 10,  3,  9,  7,  7, 13, 41,  5,  1,  8,  3,  1, 11,  7,  3, 14,\n",
       "        1,  3,  6,  2,  5,  6,  1,  5,  6,  5,  0, 10,  6,  4,  5,  2,  3,\n",
       "        6,  2,  7,  9,  5,  2, 25,  2,  1,  4,  1, 12,  2,  1,  3,  8,  9,\n",
       "       20,  4,  3, 10, 14, 20])"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#len(gbm.feature_importances_)\n",
    "gbm.feature_importances_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Figure size 864x432 with 0 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbMAAAEWCAYAAADsPHnaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3XucVWX5///XGzBTKIQQQ1DGAyCCOIamfk0cJMhTmGUiaqGoZeaxPGBlH/TjAUPyEP7KM5jmsVAgUxEZ60NagAKCRiiMQpxUJAERAa/fH/caXLPZe8+eYfZee8++no/Hfrj2vU7XPeDcrNN7ycxwzjnnSlmLpAtwzjnntpcPZs4550qeD2bOOedKng9mzjnnSp4PZs4550qeD2bOOedKng9mzjnnSp4PZs41MUk1kjZIWhf77L4d26uStLQpa8xxvyMlPVjo/TrXGD6YOZcf3zSzNrHPsqQKkdSqEOs4lyQfzJwrEEmHSfq7pDWS5kiqis07S9IbktZKWiTph1F7a+AvwO7xozxJ4yRdF1u/ztFbdHR4paS5wHpJraL1/ijpXUmLJV0UW36kpCckPSjpQ+A84GfAkGifc7LVGa9B0k8lrZK0XNJZsfk7SRoj6W1J/5X0f5J2yuFnc2a0r7VR3ac33Z+Kay78X1/OFYCkzsCfge8BzwADgD9K2s/M3gVWAScAi4B+wF8kzTCzVyQdCzxoZl1i28tlt0OB44H3gE+BScBTUXsX4HlJC8zs2Wj5E4HvAt8HdgQ6APua2RmxbWasM5r/ZaAt0BkYCDwh6Ukz+wC4GegF/D9gBXAo8Gm2nw3wEXA7cIiZLZDUCWifS+ddefEjM+fy48noKGONpCeBM4CnzexpM/vUzKYAM4HjAMzsz2b2lgUvAs8BR25nDbeb2RIz2wAcAuxqZtea2Sdmtgi4Gzg1tvxLZvZkVN+GdBvMoc5NwLVmtsnMngbWAT0ktQCGAxeb2X/MbIuZ/d3MNtb3syEMxL0l7WRmy81s/nb+XFwz5IOZc/nxLTPbJfp8C+gKfDc2wK0BvgZ0ApB0rKSXJa2O5h1HODLaHkti010Jpyrj+/8ZsFuG5dPKoc73zWxz7PtHQJtomc8Db6XZbMafjZmtB4YQTnsul/Tn6IjNuTr8NKNzhbEE+L2ZnZs6Q9KOwB8Jp/eeMrNN0dFc7bnEdK+2WA/sHPv+5TTLxNdbAiw2s25ZakzdT53vOdSZzXvAx8A+wJyUeRl/NgDRadBno+tr1xGOKLf3qNU1M35k5lxhPAh8U9I3JLWU9PnohokuwOcI16jeBTZH18gGxdZdCXxJUttY22zgOEntJX0ZuKSe/f8T+DC6KWSnqIbekg7Jss5KoCI6RUgOdWZkZp8C9wG/jm5EaSnp8GiAzPizkbSbpMHRjTAbCactt+SyT1defDBzrgDMbAnhBoufEQaDJcDlQAszWwtcBDwGfACcBkyMrfsv4GFgUXQabnfg94QjnBrCdatH69n/FuCbQCWwmHCkdA/hZo1MHo/++76kV+qrMweXAa8BM4DVwE2E/mf82USfnwLLonWOAs5vwD5dmZC/nNM551yp8yMz55xzJc8HM+eccyXPBzPnnHMlzwcz55xzJc+fMyuQXXbZxfbdd9+kyyio9evX07p166TLKCjvc3nwPhfOrFmz3jOzXetbzgezAtltt92YOXNm0mUUVHV1NVVVVUmXUVDe5/LgfS4cSW/nspyfZnTOOVfyfDBzzjlX8nwwc845V/J8MHPOOVfyfDBzzjlX8vxuRuecc9utoqKCL3zhC7Rs2ZJWrVoxc+ZMVq9ezZAhQ6ipqaGiooLHHnuMdu3a5WX/eT0yk/RlSY9IekvS65KeltQ9w7IVkubls55MJF0vaYmkdSntO0p6VNKbkv4hqSJq30HSeEmvSXpD0lVJ1O2cc8Vk2rRpzJ49e+tjSKNGjWLAgAEsXLiQAQMGMGrUqLztO2+DmSQBE4BqM9vHzPZn2zfbFotJwFfTtJ8NfGBm+wK3EF5ZAfBdYEczOwDoC/ywdqBzzjkXPPXUUwwbNgyAYcOG8eSTT+ZtX/k8zdgf2GRmv6ttMLPZCkYDxxLeZHudmdV5F5OkM4GDzeyC6Ptk4GYzq46Onu4Avk54p9LPgF8BewKXmNnEaP3BhDfx7gNMMLMrMhVqZi9H+0mddSIwMpp+AhgbDdIGtJbUCtgJ+AT4MNsPY8OmLVSM+HO2RZqdnx6wmTO9z82e97k8jDsme/qHJAYNGoQkfvjDH/KDH/yAlStX0qlTJwA6derEqlWr8lZfPgez3sCsNO3fJrwg8ECgAzBD0l8bsN3WhKO9KyVNILxGfSCwPzCez14WWAkcRHg77QJJv4leAtgQnQkvCsTMNkv6L/AlwsB2IrCcMGBeamarU1eW9APgBwAdOuzKLw/Y3MDdl7bddgr/05cT73N5KMc+r1u3jurq6ozzR48eTYcOHfjggw+47LLL2LBhA5s3b66zTur3ppTEDSBfAx6O3ny7UtKLwCHA3BzX/wR4Jpp+DdhoZpskvQZUxJabamb/BZD0OtCVaGBqgG0O1QhHZV8lvLp9d6Ad8DdJz5vZojoLmt0F3AXQo0cPu/D0Exu4+9JWXV3NKWUY+eN9bv7Ktc+5xlnNmTOHTZs20blzZ3r06EGnTp1Yvnw5u+++e94isfJ5A8h8wvWkVOkGiFSbqVvb52PTm+yz12N/Sjjywsw+pe7gvDE2vYXGDdxLgT0AolOKbQmvbj8NeMbMNpnZKmA6cHAjtu+ccyVv/fr1rF27duv0c889R+/evRk8eDDjx48HYPz48Zx4Yv7+QZ/PwewFYEdJ59Y2SDqEcJ1riKSWknYF+gH/TFm3BqiU1ELSHqS/OaMQJgLDoumTgReigfQd4Ojo+l9r4DDgXwnV6JxziVq5ciVf+9rXOPDAA/nqV7/K8ccfzzHHHMOIESOYMmUK3bp1Y8qUKYwYMSJvNeTtNKOZmaSTgFsljQA+JgxSlwBtgDmEU3ZXmNmKlLsBpwOLCacR5wGv5KtOAEm/Ihxt7SxpKXCPmY0E7gV+L+lNwhHZqdEqdwD3R7UJuN/Mcj1N6pxzzcree+/NnDlztmn/0pe+xNSpUwtSQ16vmZnZMuCUNLMujz7xZWsIN40QHf2cnmGbbWLTI9PNM7NxwLhY+wn11HkFsM3djmb2MeE2/NT2denanXPOJcPjrJxzroC2bNnCQQcdxAknhH9jjx07ln333RdJvPfeewlXV7rKajCLUjxmp3wOSLou51z5uO222+jZs+fW70cccQTPP/88Xbt2TbCq0ldWcVZmdqiZVaZ8XssSZ/WTqO65kqZK6hqbt6ek56I4q9c9AcQ5V5+lS5fy5z//mXPOOWdr20EHHURFRUVyRTUTebtmFouzGm9mp0ZtlYQ4q3/na7+NNAkYCyxMaX+VkETykaQfEZJGhkTzHgCuN7MpktoQHhPIyBNAyoP3uTxk6nPNqOOzrnfJJZfwq1/9autt7K7peJwVmeOszGxa7OvLwBnRcvsDrcxsSrRcnSO6WD88AcT73Ox5nz+TLd3ipZdeYtOmTaxdu5bZs2fz/vvv11n+448/Zvr06bRt2zYPFW+/+hJAEmdmefkAFwG3pGn/DjAFaEk4SnsH6ERI75gXLXMmMDa2zmSgKpo24NhoegLwHLADIR5rdmz9RYSHnD8PvA3skUPN67LMGwv8Ipr+VlTTnwhHb6OBltm23b17dys306ZNS7qEgvM+l4fG9HnEiBHWuXNn69q1q+22226200472emnn751fteuXe3dd99twiqbVlJ/zsBMy2HMSeIGkK1xVma2EqiNs8pVapzVi2a2KZquiC031cz+a+H2+to4q0aRdAYh4WN01NQKOBK4LKp9b8IA6pxzad14440sXbqUmpoaHnnkEY4++mgefPDBpMtqNjzOqh6Svg78HBhsZrXbXAq8amaLzGwz8CTwlcZs3zlX3m6//Xa6dOnC0qVL6dOnT52bQ1zuPM4qC0kHAXcSBrL4uwtmAO2i+gGOJhz9Oedcvaqqqpg8eTIAF110EUuXLmXz5s0sW7aMe+65J+HqSlPeBrPo6OkkYGB0a/58wrvB/kBIyJ9DGPCuMLMVKavH46xupgBxVlGM1c6SlkoaGc0aTYjeejx6Jm1i1LcthFOMU6O0fgF357NG55xzmXmcFVnjrL6eZZ0pQJ9s23XONY2PP/6Yfv36sXHjRjZv3szJJ5/MNddcw9lnn83MmTNrb7Ji3LhxtGnTpv4NumanrBJAnHOlaccdd+SFF15gzpw5zJ49m2eeeYaXX36ZW265hTlz5jB37lz23HNPxo4dm3SpLiFllQCSKc4qSwJIP0mvSNos6eSUec9IWhM9A+ecyyNJW4+4Nm3axKZNm5DEF7/4RSA8YrRhw4ZtnhV15aOsEkDM7NB07dE7ydIlgLxDuOX+sjSrjSY8lP3DXPbtCSDlwfvcePWlZ2zZsoW+ffvy5ptv8uMf/5hDDw3/O5911lk8/fTT7L///owZM2a763ClSZ/d5d7EG5aOBkaaWb+UdhESO+okgETZhpPNrHehE0Bita2LX5OLtY+Lansipb0KuCzTNbmUBJC+v7y1vO4R2W0nWLkh6SoKy/vceAd0zi35Yt26dVx99dVcdNFF7LXXXkAY6G6//Xb2228/jj322O0vJocayu3aXFJ97t+//ywzO7i+5fJ5A0hvYFaa9m8DlYTEjg7ADEl/bcB2WwPVZnalpAnAdcBAYH9gPOHt0ET7OIjwvNkCSb8xsyWN6kkjmdldwF0APXr0sAtPz98rw4tRdXU1p1RVJV1GQXmfC2PWrFm8//77nHXWWVvbWrVqxejRo7npppvyvv/q6mqqyvDPuZj77Akgzrmi9+6777JmzRoANmzYwPPPP0+PHj148803gXDNbNKkSey3335JlukSlM8js/nAyWna85YAIqnJE0Ccc8lbvnw5w4YNY8uWLXz66aeccsopHH/88Rx55JF8+OGHmBkHHnggv/3tb5Mu1SUkn7/gXwBukHSumd0N2ySAjAfaExJALqfugFUDnC+pBdCZhBJAnHPFoU+fPrz66qvbtE+fPj2Balwx8gQQMieASDokav8ucGfUh9p1/gY8DgyI1vlGPmt0zjmXmSeAkDUBZAbQJcM6R2bbpnPOucLxBBDnmrklS5bQv39/evbsSa9evbjtttsAGDlyJJ07d6ayspLKykqefvrphCt1rvHK6qYISf8Adkxp/p6ZvZZEPc4VQqtWrRgzZgxf+cpXWLt2LX379mXgwIEAXHrppVx2WbpMAOdKS1nFWZnZoWZWmfJ5LUuc1ZmS3o1FX50Tm3eTpHnRZ0g+63Zue3Tq1ImvfCW8bu8LX/gCPXv25D//+U/CVTnXtMoqziqLSaSPswJ4tDaJpJak4wkv46wkHOm9KOkvZvZhph14nFV5SLLP9cVBAdTU1PDqq69y6KGHMn36dMaOHcsDDzzAwQcfzJgxY2jXrl0BKnWu6XmcVd3a6sRZpdYRa78c2NHMrou+3ws8a2aPpSzncVYe7VQw9cVBbdiwgYsvvpgzzjiDfv36sXr1atq2bYsk7rvvPt5//32uvPLKBu/Xo53KQ7HHWWFmefkAFwG3pGn/DjAFaEk4SnsH6ERI75gXLXMmMDa2zmSgKpo24NhoegLwHLADIR5rdmz9RUBbwvNrbwN75FDzupTvZwLLCY8SPFG7DWAQ4fGBnQmRXIuAn2bbdvfu3a3cTJs2LekSCq5Y+/zJJ5/YoEGDbMyYMWnnL1682Hr16tWobRdrn/PJ+1w4wEzLYczxOKvsJgEVZtYHeJ6Q/YiZPQc8DfwdeBh4iZBa4lzRMTPOPvtsevbsyU9+8pOt7cuXL986PWHCBHr37p1Eec41CY+zysLM3o99vRu4KTbveuB6AEl/IP31NucSN336dH7/+99zwAEHUFlZCcANN9zAww8/zOzZs5FERUUFd955Z8KVOtd4HmeVhaROZlb7z9fBwBtRe0tgFzN7X1IfoA/hdKdzRedrX/san/377zPHHXdcAtU4lx95G8zMzCSdBNwqaQTwMWGQugRoQ4izMqI4q+gGkFrxOKt5FCDOCjiNKM4KuMdCushFkgYTjhRXE66hQbhG97forbYfAmeYmZ9mdM65hHicFVnjrK4CrkrT/jHh/WnO5c2SJUv4/ve/z4oVK2jRogU/+MEPuPjii7n88suZNGkSn/vc59hnn324//772WWXXZIu17lEeZyVc0WqNrnjjTfe4OWXX+aOO+7g9ddfZ+DAgcybN4+5c+fSvXt3brzxxqRLdS5xiQxmSSWDSPpHLM2j9nNAluWHSJoraX50KrK2PWMyiHNNJVNyx6BBg2jVKpxUOeyww1i6dGmSZTpXFAqezZhkMoiZHZrrspK+BIwG+prZu5LGSxpgZlOjRbZJBsnGE0DKQ0P7nEtqB9RN7oi77777GDLE09ScSyJouD/h9vrf1TaY2WwFo0lJBomvWOBkkL2Bf5vZu9H35wkPfE/NsPw2UhJA+OUB5XWPyG47hV/u5aShfa6urq53mdrkjnPOOYdXXvnsXqgHH3yQNWvW0Llz55y2ky/r1q1LdP9J8D4XoVyerG7KDyWSDAK0A5ZG+28F/BGYFNvONskg2T6eAFIemrrPmZI7xo0bZ4cddpitX7++SffXGP7nXB48ASR3RZUMYmYfAD8CHgX+RnisoPaf3GmTQZxrSpYhueOZZ57hpptuYuLEiey8884JVuhc8UhiMJsP9E3TnrdkEOqeTs05GcTMJll4bczhwAKilA8ze9/MardzN+n749x2qU3ueOGFF+q8QPOCCy5g7dq1DBw4kMrKSs4777ykS3UucUlcMyuZZBBJHc1slaR2wPlEz8xlSgZxril5codzuSv4YGZWOskgwG2SDoymrzWz2rstMyWDOOecS0ASR2allAwyNEN72mQQ57LJlOixevVqhgwZQk1NDRUVFTz22GP+kkznGqiYbgBxrlnLlOgxatQoBgwYwMKFCxkwYACjRo1KulTnSk5eB7Okkj4aStJ/JH0iaUs8GUTSLbHv/5a0JrZOfNmJSdTtSkumRI+nnnqKYcOGATBs2DCefPLJJMt0riTl7TRjkkkfjfAdwjNnC82sMtZ+ae2EpAuBg2LzNqQsm5UngJSHcce0zmm5eKLHypUr6dSpExAGvFWrVuWzROeapXxeMyuVpA/M7OVoP9n6MxT4n9y77wkg5ZgAkktKQmqix+bNm+usk/q92BV9MkQeeJ+LTz4Hs97ArDTt3wYqCckcHYAZkv7agO22BqrN7EpJE4DrgIGEV7KMB2pP+VUSjqQ2Agsk/cbMljSmI5K6AnsRHiuo9XlJMwl3NI4ys23ODZnZXcBdAHvuva+NeS2R+20S89MDNlNufR53TGuqqqoyzt+0aRMnnHAC55133tYHoTt37kyPHj3o1KkTy5cvZ/fdd8+6jWJTXV1dUvU2Be9z8UniN83WpA9gpaTapI+5Oa6fmvSx0cw2SUqb9AEgqTbpo1GDGXAq8ERUc609zWyZpL2BFyS9ZmZvZdrATju0ZEGOobLNRXV1NTWnVyVdRkFl+5drpkSPwYMHM378eEaMGMH48eM58cQTC1Cpc81LPm8AKZmkjxycCjwcb4geL8DMFgHV1L2e5tw2MiV6jBgxgilTptCtWzemTJnCiBEjki7VuZKTzyOzkkn6yEZSD0Lo8EuxtnbAR2a2UVIH4AjCdTvnMsqU6AEwdWrOL2NwzqWRt8GslJI+ohdvngbsLGkpcE/sweuhwCNW97dQT+BOSZ8SjiBHmdnr+azROedcZnm9ZlZCSR9XAGnvdkzdR9T2dyDjG6qdc84VlieAOFcgS5YsoX///vTs2ZNevXpx2223AbB69WoGDhxIt27dGDhwIB988EHClTpXespqMJP0j1hqx9akj6TrcuXB46ycy5+yirOK3k1WmfJ5TdL1kpZED2THa8oWZzVM0sLoMyyfdbvmweOsnMsfj7MKJgFjiV6+WcvM0sZZSWpPSAM5mHATyyxJE6O3U6flcVblweOsnEuGx1nRqDirbwBTzGx1tN4U4BhSnkXzOCuPs0rH46xKn/e5CJlZXj7ARcAtadq/A0wBWhKO0t4BOhHSO+ZFy5wJjI2tMxmoiqYNODaangA8B+xAiMeaHVt/EdCW8Pza28AeOdS8LkN7V2A50DL6fhnwi9j8q4HLsm27e/fuVm6mTZuWdAkFV1+fP/nkExs0aJCNGTNma1v37t1t2bJlZma2bNkyK7W/K/7nXB6S6jMw03IYc5K4AWRrnJWZrQRq46xylRpn9aKZbYqmK2LLTTWz/5rZx0BtnFVjpcZZpTuES/80rHMRqyfOCvA4K+cayeOscpMaZ7UU2CP2vQuwbDu278qAx1k5lz8eZ1WPdHFWwLOEvtW+234QcFWha3OlxeOsnMsfj7Oi4XFWZrZa0v8CM6Kmay26GcQ551zheZwVDY+zitrvA+7Ltl1XnIYPH87kyZPp2LEj8+aFRxtHjhzJ3Xffza677grADTfcwHHHHZdkmc65BiirBBDnAM4880yeeeaZbdovvfRSZs+ezezZs30gc67ElFUCSKY4qywJIP0kvSJps6ST02zvi5L+I2lsPut2Tatfv360b98+6TKcc02orBJAzOzQdO2SWpMmAYTwDNyZhOfK0vlfwqMF9fIEkMKqacRbvceOHcsDDzzAwQcfzJgxY2jXrl39KznnioIngJA5ASS6jkf03rI6JPUlDMzPEGKttuEJIMklgNSXVLBixQrWr1+/dbk+ffpw7733Ion77ruP0047jSuvvLLB+y36lIQ88D6Xh6Lvcy5PVjfmQ/NKABkHnBz73gKoJjxrVqfWTJ9SS3VoCsWckrB48WLr1atXg+fVp5j7nC/e5/LgCSDbKsUEkFTnA0+b2ZIm3KZL0PLly7dOT5gwgd69eydYjXOuofJ5mnE+sM1NE+QxAURSvhJAUh0OHCnpfMIzc5+TtM7MPLqhBAwdOpTq6mree+89unTpwjXXXEN1dTWzZ89GEhUVFdx5551Jl+mcawBPAGkEM9v6DFzs+p4PZCXi4Ycf3qbt7LPPTqAS51xTydtpxujo6SRgYHRr/nxgJPAHYC4hAeQFogSQlNXjCSA3U4AEkCj5Y2dJSyWNjNoPidq/C9wZ9cE551yR8QQQMieAmNkMQohwtnXr7MsVj3RJH7VuvvlmLr/8ct599106dOiQUIXOuabS4CMzSe0k9clHMc41pUxJH0uWLGHKlCnsueeeCVTlnMuHnAYzSdVR2kV7wunB+yX9Oof1Sj0B5JbYcv+WtCZlvieAFLFMSR+XXnopv/rVr+p7s7hzroTkepqxrZl9KOkc4H4z+x9Jc7Ot0BwSQMzs0tgyFwIHpayacwKIKw4TJ06kc+fOHHjggUmX4pxrQrkOZq0kdSJc//p5juuUfAJIiqHA/8RqrDcBJM7jrPKjIbFVH330Eddffz3PPfdcHityziUh18HsWsILKaeb2QxJe7NtjmGq3sCsNO3fBioJiR0dgBmS/ppjHQCtgWozu1LSBOA6YCCwPzAemBgtV0k4ktoILJD0m8Y+5CypK7AX4e5LokcGxgDfAwZkWc/jrPLc54bEVi1atIh///vf9OjRA4B3332XXr168dvf/rbJgoeLPvInD7zP5aHY+5zTYGZmjwOPx74vIsRSNcbWBBBgpaTaBJCspy1jUhNANprZJklpE0AAJNUmgDQ2seNU4ImoZoglgGQ7mjOzu4C7AHr06GEXnn5iI3dfmqqrqzmlqirRGmpqamjdujVVVVVUVVUxfPjwrfMqKiqYOXNmk97NWF1dTVXCfS4073N5KPY+53oDSHdJU2tv0JDUR9Iv6lltPtA33eZy2GWjEkCoOzg3ZQLIqUD8SdvDgQsk1RCeg/u+pFHbsX2XB0OHDuXwww9nwYIFdOnShXvvvTfpkpxzeZLrL/i7Cc+F3QlgZnMl/YFwii+TZpEAIqkH0A54qbbNE0BKQ7qkj7iamprCFOKcy7tcnzPb2cz+mdKW9WJIc0gAiQwFHokdDTrnnCsyuR6ZvSdpH8Ldh0RvXV6efZXSTwBJt4808+vsyznnXOHlemT2Y8Ipxv0k/Qe4BDgvb1U51wSGDx9Ox44d077O5eabb0YS7733XgKVOeeaWr2DWXTd6mAz+zqwK7CfmX3NzN7Oe3VNLFMCSNJ1ufzwOCvnyke9g1l0l+AF0fR6M1ub68aLLc7KzA41s8qUz2tZ4qzOk/RaNOj9n6T9Y7VuiA2Iv0u/R5ckj7Nyrnzkes1siqTLgEeB9bWNZrY60wrFGGeVxSTSxFkBf6hNMJE0GPg1cEw07y0zq8x1B54Akh8NSQABj7NyrrnKdTCrfdL0x7E2A/bOsk7Jx1mZ2Yexr62jenPmCSDFlQDy8ccfc+WVVzJ69Oit36dPn07btm2brJ5iT0nIB+9zeSj6PptZXj7ARcAtadq/A0wBWhKO0t4BOhHSO+ZFy5wJjI2tMxmoiqYNODaangA8B+xAiMeaHVt/EdCW8Pza28AeOdS8Lk3bj4G3COkh3aK2CsIR6quEoOEj69t29+7drdxMmzYt6RJs8eLF1qtXLzMzmzt3ru26667WtWtX69q1q7Vs2dL22GMPW758eZPtrxj6XGje5/KQVJ+BmZbDmJPTkZmk72cYCB/IZf0UJRVnZWZ3AHdIOg34BTCM8FjCnmb2fhQ4/KSkXlb3SM4VmQMOOIBVq1Zt/Z6POCvnXDJyvTX/kNjnSMLDz4PrWac5xVkBPAJ8K9rXRjN7P5qeRThyS3tji0uOx1k5Vz5yDRq+MP5dUlvg9/WsVvJxVpK6mVntTSHHE90gImlXYLWZbYneINCNcFrTFRGPs3KufDT2aOUjwi/wjMzMJJ0E3CppBPAxYZC6BGhDiLMyojgrSRWx1eNxVvMoQJwVcBpRnBVwj4XkjwskfR3YRBiEh0Wr9AOulbSZcNR3nmW5s9M551x+5XrNbBKf3cnXgvDusMczrxFYicdZmdnFGZb/I/DHbNt0hTN8+HAmT55Mx44dmTcvPKp49dVX89TGZyqxAAAeqUlEQVRTT9GiRQs6duzIuHHj2H333ROu1DmXL7leM7uZ8DLKMcCNQD8zuzJvVTnXAOmSPi6//HLmzp3L7NmzOeGEE7j22msTqs45Vwi5DmbHmdmL0We6mS2VdFN9KxVbAkimOKssCSA/ieqeG73PrWts3pbYNiZuuzdXKOmSPr74xS9unV6/fr2nfTjXzOV6zWwgkHokdmyatq2KMQHEzA5N1y6pNekTQF4lPLz9kaQfER7OHhLN22CeAJJVUyaANDTpA+DnP/85DzzwAG3btmXatGlNUodzrjjps7vc08wMv8DPJyR9vBWb9QVgupmdkWXdo4GRZtYvpV2EQaFOAkh0A8hkM+td6ASQWG3r4tfkUuYdRHiQ+4j6lo2tE08A6fvLW++ur4RmZbedYOWGptnWAZ2zp3SsWLGCq666ivvvv3+beQ899BCffPIJZ511VtMUk8W6deto0ybrX4tmx/tcHpLqc//+/WeZ2cH1LpjtiWpCgkYF8DDhoePaT/v6nsammSSAxOaNBX4R+74ZmAm8DHyrvm17Akh+xZM+UtXU1GSc19Q8GaI8eJ8Lh6ZIALGQoPFfwtuWkdQxGhzaSGpjZu/UO1puq6QSQKL1zwAOBo6KNe9pZsui58xekPSamb2Vfguu0BYuXEi3buHpkYkTJ7LffvslXJFzLp9yvTX/m4TE+N2BVYSB4Q2gV5bV5gMnp9tcDrtsVAKIpCZPAImeM/s5cJSZbd2mhccOMLNFkqqBg6h7KtYVyNChQ6murua9996jS5cuXHPNNTz99NMsWLCAFi1a0LVrV373O39Lj3PNWa6/4K8DDgOeN7ODJPUnOlrLojkkgBxEeMP2MWa2KtbeDvjIzDZK6gAcQbhu5xKQLunj7LPPTqAS51xScr01f5OFLMIWklqY2TQg65180dHTScDA6Nb8+YRMxz8QTinOIQx4V5jZipTV4wkgN1OABJAo+WNnSUsljYxmjSaklTyecgt+T2CmpDnANGCUmb2ezxqdc85lluuR2RpJbYC/AQ9JWkU4FZiVlX4CyNczLP934IBs23SF4wkgzrlcj8xOJOQxXkK4+eIt4Jv5Ksq5hvAEEOdcToOZma0H9iDcHj8euIdwV2FWzSABpJ+kVyRtlnRyrL1/yjY+lvStfNbuMvMEEOdcrncznkt4+Lc94SHkzsDvgAFZ1mkOCSDvEJ5ZuyxlO1uvGUpqD7xJeN7NFRFPAHGufOR6zezHhDsK/wFgZgujZ86y6U+4cWTrPdFmNlvBaFISQOIrFjoBxMxejvaT2l4TtX+apZ8nA38xs4+y/TA8zmr7NCbO6vrrr+f666/nxhtvZOzYsVxzzTVNUotzrvjkOphtNLNPan/ZR89zZc7BCnoDs9K0f5twVHMg0AGYIemvOdYB0BqoNrMrJU0gPDYwkPBamvFA7R2HlYRnvzYCCyT9xswa9dB0PU4lPIO3jZQ4K355QL33zDQru+0UBrSmUF1dnXX+ihUrWL9+fdrl9tprL6666ir69+/fJLVks27dunprbW68z+Wh2Puc62D2oqSfATtJGkjIa5zUyH2WXAJIJpI6Ee5qfDbdfDO7C7gLoEePHnbh6Sc25e6LXnV1NadUVRVkXzU1NbRu3ZqqaH/xBJDf/OY39O3bd+u8fKquri7IfoqJ97k8FHufcx3MRgBnEwaPHwJPE24CyaZZJIDU4xTCKcxNedi2y5EngDjnsv6Cl7Snmb1jZp8Cd0efXJV8AkgOhgJXJV1EufMEEOdcfbfmP1k7IemPDdlwc0gAkXRI1P5d4M6oD7XrVBAeV3gxn7U555yrX32n3uKnBPdu6MabQQLIDKBLhnVqCEeNzjnnElbfkZllmHauaAwfPpyOHTvSu3fvrW1XX301ffr0obKykkGDBrFs2bIEK3TO5Vt9g9mBkj6UtBboE01/KGmtpA8LUWBTypQAknRdbvt4nJVzrr6Xc7bMx04lfRm4lXBL/kbCDR+XmNk2ySDRtanJZtY7dV5DZUoAyVLnEMK7zFoCf6598FrST4BzCHddvgsMN7O3t7c+1zj9+vWjpqamTpvHWTlXXvJxu3pWxRhzlY6kLxFeAdPXzN6VNF7SADObCrxKSCj5SNKPCAkkQ7JtzxNAtk9jEkA8zsq58qHPHtkq0A6lo4GRZtYvpV2EQaFOzFX8yKyQMVfRYwQ31r4GRtL3gMPN7PyU5Q4CxprZEWm2EU8A6fvLWxvyZEPp220nWLmhabZ1QOe2WeevWLGCq666ivvvv3+beQ899BCffPIJZ511VtMUk8W6deto06ZN/Qs2I97n8pBUn/v37z/LzA6ud0EzK+gHuAi4JU37d4AphFN6uxFCfjsRUj3mRcucSRg4ateZTEjyhzAAHhtNTyAE/+5AiM2aHVt/EdCW8Fzb28AeGepsByyN9t8K+CMwKc1yY4Ff1Nfv7t27W7mZNm1awfa1ePFi69WrV9p5NTU1Gec1tUL2uVh4n8tDUn0GZloOY0uu7zMrhK0xV2a2kvD81iENWD815upFC8kcaWOuzOxjoDbmahtm9gHwI+BRwktJa0h5IamkM4CDCacjXRFZuPCzFyBMnDiR/fbbL8FqnHP5VvBrZpRQzJWZTSLKoIxOGW7ZWqz0dcLNIUeZ2cb0W3CF4HFWzrkkBrOSibmS1NHMVklqRwhXPiVqPwi4EzjGzFblswZXP4+zcs4VfDAzM5N0EnCrpBHAx0S35gNtCDFXRhRzFd0AUiseczWPPMdcAbdJOjCavtY+e3RgdFTr49Et3++Y2eA81+Kccy6DJI7MSinmamiG9q9nW88V1vDhw5k8eTIdO3Zk3rx5QEgAeeqpp2jRogUdO3Zk3Lhx7L777glX6pzLl2K6AcS5RvEEEOdcIoOZpC9LeiRK039d0tOSumdYtkLSvDzXky7m6hBJf5b0L0nzJY2KLX+mpHdjy56Tz/pcdv369aN9+/Z12jwBxLny4gkgpI+5krQz4YHsaZI+B0yVdKyZ/SVa5FGLHt7OhSeAbB9PAHHOZeMJIBkSQNLUfRvh4e27U+vIso4ngHgCSLPnfS4PngBSogkgKbXtEq23d2w7ywkvGX0il214Akh+eQJIcrzP5cETQHJXVAkgtaIHrh8GbjezRVHzJKDCzPoAzwPjG1CnKwBPAHGuvHgCSP0/g7uAhWZ2a22Dmb0fm383cFMOtbs88QQQ55wngGQh6TrCKclzUto7mdny6Otg4I181uGy8wQQ55wngGQgqQshe/FfwCvRrd1jzewe4CJJgwlHiqsJ19Ccc84lxBNAMte4lAynPs3sKuCqTOs655wrrGK6AcS5rIYPH07Hjh3p3bv31rbLL7+c/fbbjz59+nDSSSexZs2aBCt0ziUlr4NZsSV9ZCLpP5I+kbQllupxQDTvlKj2+ZL+kLLeF6N1xyZRd7lJF1s1cOBA5s2bx9y5c+nevTs33nhjQtU555KUt8EslvRRbWb7mNn+hAeZd8vXPrfDdwi36G8ws8ro85qkboTTiUeYWS/Cdb24/yU8QuAKIF1s1aBBg2jVKpwtP+yww1i6dGkSpTnnEpbPa2b9CbfLb70n2sxmKxhNStJHfMVCJ32Y2cvRflJnnQvcYeGt01js3WWS+hIG5mcIb5vOyuOs6teYyKq4++67jyFDhmzXNpxzpSmfg1lvYFaa9m8DlYRkjg7ADEl/bcB2WxOO9q6UNAG4DhgI7E94eHlitFwlcBDhubIFkn5jZksa2IfuAJKmE5JJRprZM9GjAWOA7wEDMq2cEmfFLw/Y3MDdl7bddgoDWq6qq6vrXWbFihWsX79+m2UffPBB1qxZQ+fOnXPaTr6sW7cu0f0nwftcHoq9z0nczbg16QNYKak26WNujuunJn1sNLNNktImfQBIqk36aOhg1groBlQBXYC/SeoNnAE8bWZLsqWxm9ldhIeu6dGjh114+okN3H1pq66u5pSqqibdZk1NDa1bt6Yqtt3x48czf/58pk6dys4779yk+2uo6urqOrWVA+9zeSj2PudzMCu1pI90lgIvR7FYiyUtIAxuhwNHSjqf8Gzc5yStM7MRjdiH2w7PPPMMN910Ey+++GLiA5lzLjn5vJvxBWBHSefWNqQkfbSUtCsh6eOfKevWAJWSWkjagzwnfWTxJOHaH5I6EE47LjKz081sTzOrAC4DHvCBLP+GDh3K4YcfzoIFC+jSpQv33nsvF1xwAWvXrmXgwIFUVlZy3nnnJV2mcy4BeTsyK5WkDwBJvwJOA3aWtBS4J3rw+llgUHSacgtweUouoysgj61yzmWS12tmpZD0Ec2/Atjmbseojp9En0zr1tmXc865wvMEEFcyPAHEOZdJWQ1mkv4RS/iok/Thip8ngDjnMimrOCszOzSW8BFP+rhe0pLogex4TTtKelTSm9FAWBG1n54yIH4qqTKftTtPAHHOZZa3a2axOKvxZnZq1FZJSM34d77220iTgLHAwpT2s4EPzGxfSacSXsI5xMweAh4CiI7snjKz2dl24Akg9fMEEOdcY3mcFVnjrE4ERkbTTwBjJSn2nBvAUGDb2+zwBBBPACkP3ufyUPR9NrO8fICLgFvStH8HmEKIh9oNeAfoREjvmBctcybhRZi160wGqqJpA46NpicAzwE7EOKxZsfWX0R4S/TngbeBPXKoeV3K93lAl9j3t4AOKcu8BfSub9vdu3e3cjNt2rQm3+bixYutV69eddrGjRtnhx12mK1fv77J99dQ+ehzsfM+l4ek+gzMtBzGHI+zyi5dWsnWozJJhwIfmVkir65xngDinAvyeQPIfKBvmva8xVlR97RpU8VZ7QEQRWW1BVbH5p9KhlOMrul5AohzLpN8Hpm9ANwg6Vwzuxu2ibMaD7QnxFldTt0BqwY4P0qn70xycVYTgWHAS4ScyRdqB9Kotu8S6ncF4AkgzrlMPM6KrHFW9wK/l/Qm4Yjs1Nhq/YClZrYon7U555yrn8dZkTXO6mPC0Ve6daqBw7Jt1zXO8OHDmTx5Mh07dmTevHA58vHHH2fkyJG88cYb/POf/+Tgg+t9H6pzroyUVQKIKw3pkj569+7Nn/70J/r187O6zrltlVUCSKY4qywJIOdJei1a7v8k7R+1D5Q0K5o3S9LR+ay73KRL+ujZsyc9evRIqCLnXLErqwQQMzs0Xbuk1qRPAPmDRQ99SxoM/Bo4BngP+KaZLYvePP0s4UaVjDwB5DPbm/ThnHOpPAGEzAkgZvZh7GvrqF7M7NVY+3zg85J2NLP44wCeAJIhAWR7kj7WrFnDrFmzWLduXfoVE1b0KQl54H0uD0Xf51yerG7Mh2aQABK1/ZiQ8rEE6JZm/snA8/Vt2xNAGiZd0oeZ2VFHHWUzZszYjqryy5MhyoP3uXDIMQEkiRtAtiaAmNlKoDYBJFepCSAvmtmmaLoittxUM/uvhTsSaxNAGszM7jCzfYArgV/E50nqRQgf/mFjtu2cc65peAJI7h4BvlX7RVIXwpHh983sre3ctotJl/QxYcIEunTpwksvvcTxxx/PN77xjaTLdM4VEU8AyUJSNzOrvSnkeKIbRCTtAvwZuMrMpidRW3OWLukD4KSTTipwJc65UpG3I7Po6OkkYGB0a/58wutU/kAIFZ5DGPCuMLMVKavHE0BupgAJIFHyx86SlkoaGc26QNJ8SbOBnxCirQAuAPYFro7d4t8xnzU655zLzBNAyJoAcnGG5a8Drsu2Teecc4XjCSCu6AwfPpyOHTvSu3fvrW2PP/44vXr1okWLFsycOTPB6pxzxcgTQLIngPST9IqkzZJOTpm3p6TnJL0R9a0in7WXE4+zcs41lCeAkDUB5B3CM2uXpVntAeB6M5siqQ3hzkrXBPr160dNTU2dtp49eyZTjHOuJHgCCFkTQGqi9joDVZTR2MrMpkTL1RtH4XFWn/E4K+dcU8vnYNYbmJWm/dtAJSGxowMwQ9JfG7Dd1kC1mV0paQLhRoyBwP7AeMILNYn2cRDhebMFkn5jZksa1ZNtdQfWSPoTsBfwPDDCzLbEF/I4K4+zKgfe5/JQ7H3O692MGWxNAAFWSqpNAJmb4/qpCSAbzWyTpLQJIACSahNAmmowawUcSRgs3wEeJZyOvDe+kJndBdwF0KNHD7vw9BObaPelobq6mlOqqhq1bk1NDa1bt6YqZf1ddtmFvn37Fu37zKqrq7epubnzPpeHYu+zJ4A0zlLgVTNbZGabgSeBrzTh9p1zzjVAPgezF4AdJZ1b25CSANJS0q6EBJB/pqxbA1RKaiFpDxJKAMliBtAuqh/gaEL+o2sCHmflnGuovJ1mNDOTdBJwq6QRwMeEQeoSoA0hAcSIEkBSbm2PJ4DMowAJIMBpRAkgwD1mNjIafCcA7YBvSrrGzHqZ2RZJlwFTo7s2ZwF357PGcuJxVs65hvIEELImgMwAumRYZwrQJ9t2nXPOFYYngLii4wkgzrmGKqvBLFMCSNJ1ubo8AcQ511BJ3JqPpC8DtxJuyd9IdC3NzLZJBomupU02s96p8xoqUwJIhhp3Bh4nPHS9BZhkZiNi808hvAXAgDlmdtr21ucCTwBxzjVUwQezYoy5yuJmM5sm6XOEmz2ONbO/SOoGXAUcYWYf5PL6F08A+YwngDjnmloSR2YlEXNlZh8B06LpTyS9wmc3g5wL3GFmH0TzV6XbhieAeAJIOfA+l4ei77OZFfQDXATckqb9O8AUoCXhKO0doBMh1WNetMyZwNjYOpOBqmjagGOj6QnAc8AOhNis2bH1FwFtCQ9ivw3skUPNu0Tr7R19f5IwUE4HXgaOqW8b3bt3t3Izbdq0Rq+7ePFi69Wr1zbtRx11lM2YMWM7qsqv7elzqfI+l4ek+gzMtBzGlkSumWVQlDFXkloBDwO3m9miqLkV0A2oIhyt/U1SbzNbk2OtzjnnmlASdzOWWszVXcBCM7s11rYUeMrMNpnZYmABYXBzTcATQJxzDZXEkdkLwA2SzjWzu2GbmKvxQHtCzNXl1B2waoDzJbUAOpPnmCtJ1xFOSZ6TMutJYCgwTlIHQor+IlyT8AQQ51xDFXwwMyuNmCtJXYCfA/8CXonedTbWzO4BngUGRacptwCXm9n7+arFOedcdolcM7MSiLkys6VkOPUZ1fGT6OMaafjw4UyePJmOHTsyb948AFavXs2QIUOoqamhoqKCxx57jHbt2iVcqXOu2JVVAogrLumSPkaNGsWAAQNYuHAhAwYMYNSoUQlV55wrJXkdzCR9WdIjkt6S9LqkpyV1z7BshaR5+awnE0n/kfSJpC3xmCtJt8S+/1vSmmj5rpJmRe3zJZ2XRN2lrl+/frRv375O21NPPcWwYcMAGDZsGE8++WQSpTnnSkzeTjOWWNLHdwjPnC00s8pY+6W1E5IuJLxZGmA58P/MbKOkNsA8SROj06dplWMCyLhjWjd4nZUrV9KpUycAOnXqxKpVaZ9Hd865OvJ5zawkkj6iul6O9pOtP0OB/4mW/yTWviMZjnDLPQEkl8SA1KSPzZs311kn9XuxK/qUhDzwPpeHou9zLk9WN+ZDaSZ9rMvQ3pVwNNYy1rYH4YHuj4Af17dtTwBJLzXpo3v37rZs2TIzM1u2bJmV2s/NkyHKg/e5cMgxASSJG0C2Jn2Y2UqgNukjV6lJHy+a2aZouiK23FQz+6+ZfQzUJn001qnAExbSSQAwsyVm1gfYFxgmabft2L6LDB48mPHjxwMwfvx4TjzxxIQrcs6VgnwOZqWW9JHNqYRIq21YuE42HzhyO7ZfltIlfYwYMYIpU6bQrVs3pkyZwogRI+rfkHOu7OXzmlnJJH1kI6kH0A54KdbWBXjfzDZIagccAfw6oRJLVqakj6lTpxa4EudcqcvbYGZWGkkfAJJ+BZwG7CxpKXCPffbg9VDgkdjRIEBPYIwkIxxp3mxmr+WzRuecc5nlNQHESiDpI5p/BZDpvWYj07RNAfpk26ZzzrnC8QQQ55xzJa+Y3meWd5L+QXguLO57forQOedKW1kNZmZ2aNI1OOeca3p+mtE551zJU92b9Fy+SFpLeCN1OekAvJd0EQXmfS4P3ufC6Wpmu9a3UFmdZkzYAjM7OOkiCknSTO9z8+d9Lg/F3mc/zeicc67k+WDmnHOu5PlgVjh3JV1AArzP5cH7XB6Kus9+A4hzzrmS50dmzjnnSp4PZs4550qeD2YFIOkYSQskvRm9QaDZkXSfpFWS5sXa2kuaImlh9N92SdbY1CTtIWmapDckzZd0cdTebPst6fOS/ilpTtTna6L2vST9I+rzo5I+l3StTUlSS0mvSpocfW/u/a2R9Jqk2ZJmRm1F/ffaB7M8k9QSuAM4FtgfGCpp/2SryotxwDEpbSMIb/zuBkyNvjcnm4GfmllP4DDgx9GfbXPu90bgaDM7EKgEjpF0GHATcEvU5w+AsxOsMR8uBt6IfW/u/QXob2aVsWfLivrvtQ9m+fdV4E0zW2RmnwCPACcmXFOTM7O/AqtTmk8ExkfT44FvFbSoPDOz5Wb2SjS9lvDLrjPNuN8WrIu+7hB9DDgaeCJqb1Z9jl7GezxwT/RdNOP+ZlHUf699MMu/zsCS2PelUVs52M3MlkP4xQ90TLievIleLnsQ8A+aeb+jU26zgVXAFOAtYI2ZbY4WaW5/x28lvO/w0+j7l2je/YXwD5TnJM2S9IOoraj/XnucVf4pTZs/D9GMSGoD/BG4xMw+DP9wb77MbAtQKWkXYALhzevbLFbYqvJD0gnAKjObJamqtjnNos2ivzFHmNkySR2BKZL+lXRB9fEjs/xbCuwR+94FWJZQLYW2UlIngOi/qxKup8lJ2oEwkD1kZn+Kmpt9vwHMbA1QTbheuIuk2n8cN6e/40cAgyXVEC4RHE04Umuu/QXAzJZF/11F+AfLVynyv9c+mOXfDKBbdPfT54BTgYkJ11QoE4Fh0fQw4KkEa2ly0bWTe4E3zOzXsVnNtt+Sdo2OyJC0E/B1wrXCacDJ0WLNps9mdpWZdTGzCsL/uy+Y2ek00/4CSGot6Qu108AgYB5F/vfaE0AKQNJxhH/NtQTuM7PrEy6pyUl6GKgivCZiJfA/wJPAY8CewDvAd80s9SaRkiXpa8DfgNf47HrKzwjXzZplvyX1IVz8b0n4x/BjZnatpL0JRy7tgVeBM8xsY3KVNr3oNONlZnZCc+5v1LcJ0ddWwB/M7HpJX6KI/177YOacc67k+WlG55xzJc8HM+eccyXPBzPnnHMlzwcz55xzJc8HM+eccyXPBzPnmoCkLVHCeO2nohHb2EXS+U1f3dbtDy70WxskfauZBmu7IuO35jvXBCStM7M227mNCmCymfVu4Hoto4ipohIlZNxD6NMT9S3v3PbwIzPn8iQK5B0taYakuZJ+GLW3kTRV0ivRO6Nq36IwCtgnOrIbLamq9v1Z0XpjJZ0ZTddI+qWk/wO+K2kfSc9EwbB/k7RfmnrOlDQ2mh4n6bcK72NbJOkohXfSvSFpXGyddZLGRLVOlbRr1F4p6eWoXxNq320lqVrSDZJeBK4EBgOjoz7tI+nc6OcxR9IfJe0cq+d2SX+P6jk5VsMV0c9pjqRRUVu9/XVlxsz84x//bOcH2ALMjj4TorYfAL+IpncEZgJ7EVIVvhi1dwDeJITXVgDzYtusIhzV1H4fC5wZTdcAV8TmTQW6RdOHEmKXUms8ExgbTY8jJFiI8GqPD4EDCP/AnQVURssZcHo0/cvY+nOBo6Lpa4Fbo+lq4P+L7XMccHLs+5di09cBF8aWezza//6E1yZBeA/g34Gdo+/tc+2vf8rr46n5zjWNDWZWmdI2COgTO8poC3QjhE/fIKkfIQarM7BbI/b5KGxN7f9/wOOxxP4dc1h/kpmZpNeAlWb2WrS9+YSBdXZU36PR8g8Cf5LUFtjFzF6M2scTBqI6dWXQW9J1wC5AG+DZ2LwnzexT4HVJtT+PrwP3m9lHAGa2ejv665oxH8ycyx8RjjyerdMYThXuCvQ1s01RIvvn06y/mbqXAlKXWR/9twXh/Vqpg2l9arMEP41N137P9Lshl4vs67PMGwd8y8zmRD+HqjT1wGevWVGafTa2v64Z82tmzuXPs8CPotfEIKl7lELelvCOrE2S+gNdo+XXAl+Irf82sL+kHaOjoQHpdmJmHwKLJX032o8kHdhEfWjBZ+nwpwH/Z2b/BT6QdGTU/j3gxXQrs22fvgAsj34mp+ew/+eA4bFra+3z3F9Xonwwcy5/7gFeB16RNA+4k3DE8xBwsKSZhF/o/wIws/eB6ZLmSRptZksIKeVzo3VezbKv04GzJc0B5hOugzWF9UAvSbMI7/K6NmofRrixYy5QGWtP9QhwuaRXJe0DXE14q8AUon5nY2bPEF49MlPh7daXRbPy1V9XovzWfOdcRk3xyIFzheBHZs4550qeH5k555wreX5k5pxzruT5YOacc67k+WDmnHOu5Plg5pxzruT5YOacc67k/f/2QJ0Cu1tDrQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.figure(figsize=(12,6))\n",
    "lgb.plot_importance(gbm, max_num_features=20)\n",
    "plt.title(\"Featurertances\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "58\n"
     ]
    }
   ],
   "source": [
    "index_0=[]\n",
    "for i,j in enumerate(gbm.feature_importances_):\n",
    "    if j>5:\n",
    "        index_0.append(i)\n",
    "print(len(index_0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [],
   "source": [
    "#index_0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train_1=xy_train_values[:,index_0]\n",
    "x_test_1=xy_test.values[:,index_0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1]\tvalid_0's l2: 12954\n",
      "Training until validation scores don't improve for 5 rounds.\n",
      "[2]\tvalid_0's l2: 11396.8\n",
      "[3]\tvalid_0's l2: 10188.5\n",
      "[4]\tvalid_0's l2: 9241.61\n",
      "[5]\tvalid_0's l2: 8305.67\n",
      "[6]\tvalid_0's l2: 7663.47\n",
      "[7]\tvalid_0's l2: 7113.97\n",
      "[8]\tvalid_0's l2: 6679.54\n",
      "[9]\tvalid_0's l2: 6423.31\n",
      "[10]\tvalid_0's l2: 6052.28\n",
      "[11]\tvalid_0's l2: 5822.09\n",
      "[12]\tvalid_0's l2: 5607.06\n",
      "[13]\tvalid_0's l2: 5391.25\n",
      "[14]\tvalid_0's l2: 5196.89\n",
      "[15]\tvalid_0's l2: 5082.16\n",
      "[16]\tvalid_0's l2: 4995.45\n",
      "[17]\tvalid_0's l2: 4929.68\n",
      "[18]\tvalid_0's l2: 4889.22\n",
      "[19]\tvalid_0's l2: 4832.65\n",
      "[20]\tvalid_0's l2: 4784.15\n",
      "[21]\tvalid_0's l2: 4743.37\n",
      "[22]\tvalid_0's l2: 4715.17\n",
      "[23]\tvalid_0's l2: 4706.44\n",
      "[24]\tvalid_0's l2: 4677.58\n",
      "[25]\tvalid_0's l2: 4608.82\n",
      "[26]\tvalid_0's l2: 4589.86\n",
      "[27]\tvalid_0's l2: 4578.49\n",
      "[28]\tvalid_0's l2: 4602.6\n",
      "[29]\tvalid_0's l2: 4581.45\n",
      "[30]\tvalid_0's l2: 4584.71\n",
      "[31]\tvalid_0's l2: 4547.39\n",
      "[32]\tvalid_0's l2: 4492.19\n",
      "[33]\tvalid_0's l2: 4467.22\n",
      "[34]\tvalid_0's l2: 4498.95\n",
      "[35]\tvalid_0's l2: 4498.9\n",
      "[36]\tvalid_0's l2: 4490.32\n",
      "[37]\tvalid_0's l2: 4525.05\n",
      "[38]\tvalid_0's l2: 4506.25\n",
      "Early stopping, best iteration is:\n",
      "[33]\tvalid_0's l2: 4467.22\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n",
       "       importance_type='split', learning_rate=0.1, max_depth=-1,\n",
       "       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n",
       "       n_estimators=100, n_jobs=-1, num_leaves=31, objective='regression',\n",
       "       random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,\n",
       "       subsample=1.0, subsample_for_bin=200000, subsample_freq=0)"
      ]
     },
     "execution_count": 169,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gbm0 = lgb.LGBMRegressor(objective='regression',num_leaves=31,learning_rate=0.1,n_estimators=100)\n",
    "gbm0.fit(x_train_1, y_train_0,eval_set=[(x_test_1, y_test_0)],early_stopping_rounds=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 样本时间的选择\n",
    "\n",
    "* 越后面的样本是离预测数据越近的样本\n",
    "* 训练商品与预测商品重合度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 192,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train_2=x_train_1[-6000:,:]\n",
    "y_train_2=y_train_0[-6000:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 214,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1]\tvalid_0's l2: 12806.7\n",
      "Training until validation scores don't improve for 5 rounds.\n",
      "[2]\tvalid_0's l2: 11183\n",
      "[3]\tvalid_0's l2: 9842.54\n",
      "[4]\tvalid_0's l2: 8795.39\n",
      "[5]\tvalid_0's l2: 7942.59\n",
      "[6]\tvalid_0's l2: 7271.91\n",
      "[7]\tvalid_0's l2: 6736.64\n",
      "[8]\tvalid_0's l2: 6259.37\n",
      "[9]\tvalid_0's l2: 5910.09\n",
      "[10]\tvalid_0's l2: 5634.49\n",
      "[11]\tvalid_0's l2: 5355.84\n",
      "[12]\tvalid_0's l2: 5136.29\n",
      "[13]\tvalid_0's l2: 4952.22\n",
      "[14]\tvalid_0's l2: 4800.07\n",
      "[15]\tvalid_0's l2: 4681.35\n",
      "[16]\tvalid_0's l2: 4606.04\n",
      "[17]\tvalid_0's l2: 4529.26\n",
      "[18]\tvalid_0's l2: 4438.65\n",
      "[19]\tvalid_0's l2: 4404.39\n",
      "[20]\tvalid_0's l2: 4355.41\n",
      "[21]\tvalid_0's l2: 4282.96\n",
      "[22]\tvalid_0's l2: 4268.11\n",
      "[23]\tvalid_0's l2: 4252.97\n",
      "[24]\tvalid_0's l2: 4241.44\n",
      "[25]\tvalid_0's l2: 4233.44\n",
      "[26]\tvalid_0's l2: 4240.95\n",
      "[27]\tvalid_0's l2: 4250.5\n",
      "[28]\tvalid_0's l2: 4244.43\n",
      "[29]\tvalid_0's l2: 4230.46\n",
      "[30]\tvalid_0's l2: 4216.7\n",
      "[31]\tvalid_0's l2: 4200.04\n",
      "[32]\tvalid_0's l2: 4189.85\n",
      "[33]\tvalid_0's l2: 4185.05\n",
      "[34]\tvalid_0's l2: 4185.09\n",
      "[35]\tvalid_0's l2: 4192\n",
      "[36]\tvalid_0's l2: 4205.65\n",
      "[37]\tvalid_0's l2: 4223.71\n",
      "[38]\tvalid_0's l2: 4214.49\n",
      "Early stopping, best iteration is:\n",
      "[33]\tvalid_0's l2: 4185.05\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n",
       "       importance_type='split', learning_rate=0.1, max_depth=-1,\n",
       "       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n",
       "       n_estimators=100, n_jobs=-1, num_leaves=31, objective='regression',\n",
       "       random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,\n",
       "       subsample=1.0, subsample_for_bin=200000, subsample_freq=0)"
      ]
     },
     "execution_count": 214,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gbm1 = lgb.LGBMRegressor(objective='regression',num_leaves=31,learning_rate=0.1,n_estimators=100)\n",
    "gbm1.fit(x_train_2, y_train_2,eval_set=[(x_test_1, y_test_0)],early_stopping_rounds=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3000"
      ]
     },
     "execution_count": 178,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(x_train_2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 其他模型\n",
    "\n",
    "* sklearn大部分模型的速度都比lightgbm慢很多，因此下面多数直接用样本与特征数相对少的x_train_2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### gbdt\n",
    "\n",
    "* 上面的优化对于gbdt也是有用的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 199,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5118.940822179315\n"
     ]
    }
   ],
   "source": [
    "model_gbr = GradientBoostingRegressor(learning_rate=0.1,n_estimators=50)\n",
    "model_gbr.fit(x_train_0, y_train_0)\n",
    "y_pred = model_gbr.predict(x_test_0)\n",
    "print(mean_squared_error(y_pred, y_test_0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 200,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4455.500212482618\n"
     ]
    }
   ],
   "source": [
    "model_gbr = GradientBoostingRegressor(learning_rate=0.1,n_estimators=50)\n",
    "model_gbr.fit(x_train_2, y_train_2)\n",
    "y_pred = model_gbr.predict(x_test_1)\n",
    "print(mean_squared_error(y_pred, y_test_0))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### svm，LinearRegression，LogisticRegression\n",
    "\n",
    "* 多数模型需要做数据标准化的\n",
    "* 传统模型多数比不上规则"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "15236.961072048143\n"
     ]
    }
   ],
   "source": [
    "from sklearn import svm\n",
    "model_SVR = svm.SVR(gamma=\"scale\")\n",
    "model_SVR.fit(x_train_2, y_train_2)\n",
    "y_pred = model_SVR.predict(x_test_1)\n",
    "print(mean_squared_error(y_pred, y_test_0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 202,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import StandardScaler\n",
    " \n",
    "x_train_2_s=StandardScaler().fit_transform(x_train_2)\n",
    "x_test_1_s=StandardScaler().fit_transform(x_test_1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 203,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "11976.229755269243\n"
     ]
    }
   ],
   "source": [
    "model_SVR = svm.SVR(gamma=\"scale\")\n",
    "model_SVR.fit(x_train_2_s, y_train_2)\n",
    "y_pred = model_SVR.predict(x_test_1_s)\n",
    "print(mean_squared_error(y_pred, y_test_0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 206,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "33837.99659085688\n"
     ]
    }
   ],
   "source": [
    "from sklearn.linear_model import LinearRegression\n",
    "model_liner = LinearRegression(fit_intercept=True)\n",
    "model_liner.fit(x_train_2_s, y_train_2)\n",
    "y_pred = model_liner.predict(x_test_1_s)\n",
    "print(mean_squared_error(y_pred, y_test_0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 213,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10884.884287454324\n"
     ]
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "# 定义逻辑回归模型\n",
    "model_lr = LogisticRegression(solver='liblinear',multi_class='auto')\n",
    "model_lr.fit(x_train_2_s, y_train_2)\n",
    "y_pred = model_lr.predict(x_test_1_s)\n",
    "print(mean_squared_error(y_pred, y_test_0))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 请尝试套用“集成学习之Stacking的实现”的内容在这里实现多模型的集成学习"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
